pacman::p_load(tidyverse, lubridate, gridExtra, readxl, knitr, data.table, ggplot2, forecast, MLmetrics, tsbox, xts, plotly, hrbrthemes, astsa, ggfortify)Take-home Exercise 4: Prototyping Time Series Module for Visual Analytics Shiny Application
1 About this Exercise
In this exercise, we will be preparing the time-series forecasting module of the proposed Shiny Application and complete the following task:
Evaluate and determine the necessary R packages needed for my group project’s Shiny application;
Prepare and test the specific R codes can be run and returned the correct output as expected;
Determine the parameters and outputs that will be exposed on the Shiny applications; and
Select the appropriate Shiny UI components for exposing the parameters determined above.
2 Getting Started
2.1 Loading R Packages
2.2 Importing Data Into R
Talk about how we get this data: made use of data.gov.sg’s API
data <- read_csv("data/daily_historical.csv")glimpse(data)Rows: 329,156
Columns: 13
$ station <chr> "Macritchie Reservoir", "Macritchie Reservoir…
$ year <dbl> 1980, 1980, 1980, 1980, 1980, 1980, 1980, 198…
$ month <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
$ day <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14…
$ daily_rainfall_total <dbl> 0.0, 0.0, 0.0, 0.0, 22.6, 49.6, 2.4, 0.0, 0.0…
$ highest_30_min_rainfall <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ highest_60_min_rainfall <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ highest_120_min_rainfall <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ mean_temperature <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ maximum_temperature <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ minimum_temperature <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ mean_wind_speed <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ max_wind_speed <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
2.3 Creating a date column
data$tdate <- paste(data$year, "-", data$month, "-", data$day)
data <- data %>%
mutate(tdate = ymd(tdate))
glimpse(data)Rows: 329,156
Columns: 14
$ station <chr> "Macritchie Reservoir", "Macritchie Reservoir…
$ year <dbl> 1980, 1980, 1980, 1980, 1980, 1980, 1980, 198…
$ month <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
$ day <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14…
$ daily_rainfall_total <dbl> 0.0, 0.0, 0.0, 0.0, 22.6, 49.6, 2.4, 0.0, 0.0…
$ highest_30_min_rainfall <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ highest_60_min_rainfall <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ highest_120_min_rainfall <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ mean_temperature <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ maximum_temperature <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ minimum_temperature <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ mean_wind_speed <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ max_wind_speed <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ tdate <date> 1980-01-01, 1980-01-02, 1980-01-03, 1980-01-…
3 Temperature Data
3.1 Selecting the relevant columns for Temperature Data
temp <- data %>%
select(station, tdate, mean_temperature, maximum_temperature, minimum_temperature)
glimpse(temp)Rows: 329,156
Columns: 5
$ station <chr> "Macritchie Reservoir", "Macritchie Reservoir", "M…
$ tdate <date> 1980-01-01, 1980-01-02, 1980-01-03, 1980-01-04, 1…
$ mean_temperature <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ maximum_temperature <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ minimum_temperature <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
3.2 Checking for missing values
summary(temp) station tdate mean_temperature maximum_temperature
Length:329156 Min. :1980-01-01 Min. :22.20 Min. :22.80
Class :character 1st Qu.:1997-04-29 1st Qu.:27.10 1st Qu.:30.50
Mode :character Median :2011-09-18 Median :27.90 Median :31.70
Mean :2007-07-02 Mean :27.87 Mean :31.49
3rd Qu.:2017-11-27 3rd Qu.:28.80 3rd Qu.:32.60
Max. :2023-12-31 Max. :31.50 Max. :38.00
NA's :58 NA's :255645 NA's :255282
minimum_temperature
Min. :20.0
1st Qu.:24.3
Median :25.2
Mean :25.3
3rd Qu.:26.3
Max. :30.0
NA's :255283
drop those rows where date is missing.
temp <- temp %>%
drop_na(tdate)
summary(temp) station tdate mean_temperature maximum_temperature
Length:329098 Min. :1980-01-01 Min. :22.20 Min. :22.80
Class :character 1st Qu.:1997-04-29 1st Qu.:27.10 1st Qu.:30.50
Mode :character Median :2011-09-18 Median :27.90 Median :31.70
Mean :2007-07-02 Mean :27.87 Mean :31.49
3rd Qu.:2017-11-27 3rd Qu.:28.80 3rd Qu.:32.60
Max. :2023-12-31 Max. :31.50 Max. :38.00
NA's :255587 NA's :255224
minimum_temperature
Min. :20.0
1st Qu.:24.3
Median :25.2
Mean :25.3
3rd Qu.:26.3
Max. :30.0
NA's :255225
unique(temp$station) [1] "Macritchie Reservoir" "Lower Peirce Reservoir"
[3] "Admiralty" "East Coast Parkway"
[5] "Ang Mo Kio" "Newton"
[7] "Lim Chu Kang" "Marine Parade"
[9] "Choa Chu Kang (Central)" "Tuas South"
[11] "Pasir Panjang" "Jurong Island"
[13] "Nicoll Highway" "Botanic Garden"
[15] "Choa Chu Kang (South)" "Whampoa"
[17] "Changi" "Jurong Pier"
[19] "Ulu Pandan" "Mandai"
[21] "Tai Seng" "Jurong (West)"
[23] "Clementi" "Sentosa Island"
[25] "Bukit Panjang" "Kranji Reservoir"
[27] "Upper Peirce Reservoir" "Kent Ridge"
[29] "Queenstown" "Tanjong Katong"
[31] "Somerset (Road)" "Punggol"
[33] "Simei" "Toa Payoh"
[35] "Tuas" "Bukit Timah"
[37] "Pasir Ris (Central)"
missing.values <- temp %>%
gather(key = "key", value = "val") %>%
mutate(isna = is.na(val)) %>%
group_by(key) %>%
mutate(total = n()) %>%
group_by(key, total, isna) %>%
summarise(num.isna = n()) %>%
mutate(pct = num.isna / total * 100)
levels <-
(missing.values %>% filter(isna == T) %>% arrange(desc(pct)))$key
percentage.plot <- missing.values %>%
ggplot() +
geom_bar(aes(x = reorder(key, desc(pct)),
y = pct, fill=isna),
stat = 'identity', alpha=0.8) +
scale_x_discrete(limits = levels) +
scale_fill_manual(name = "",
values = c('steelblue', 'tomato3'), labels = c("Present", "Missing")) +
coord_flip() +
labs(title = "Percentage of missing values", x =
'Variable', y = "% of missing values")
percentage.plot
3.3 Further exploration of missing mean_temperature using plotly
temp_mean_wide <- temp %>%
select(tdate, station, mean_temperature) %>%
pivot_wider(names_from = station, values_from = mean_temperature)
glimpse(temp_mean_wide)Rows: 16,071
Columns: 38
$ tdate <date> 1980-01-01, 1980-01-02, 1980-01-03, 1980-01…
$ `Macritchie Reservoir` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ `Lower Peirce Reservoir` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ Admiralty <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ `East Coast Parkway` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ `Ang Mo Kio` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ Newton <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ `Lim Chu Kang` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ `Marine Parade` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ `Choa Chu Kang (Central)` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ `Tuas South` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ `Pasir Panjang` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ `Jurong Island` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ `Nicoll Highway` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ `Botanic Garden` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ `Choa Chu Kang (South)` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ Whampoa <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ Changi <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ `Jurong Pier` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ `Ulu Pandan` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ Mandai <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ `Tai Seng` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ `Jurong (West)` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ Clementi <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ `Sentosa Island` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ `Bukit Panjang` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ `Kranji Reservoir` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ `Upper Peirce Reservoir` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ `Kent Ridge` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ Queenstown <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ `Tanjong Katong` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ `Somerset (Road)` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ Punggol <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ Simei <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ `Toa Payoh` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ Tuas <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ `Bukit Timah` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ `Pasir Ris (Central)` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
plot_ly(data = temp_mean_wide,
x = ~tdate,
y = ~ Admiralty,
type = "scatter",
mode = "lines+markers") |>
layout(title = "Temperature observed by Weather Station",
xaxis = list(title = "Date"),
yaxis = list(title = "", range = c(0,40)),
theme_ipsum_rc(plot_title_size = 13, plot_title_margin=4, subtitle_size=11, subtitle_margin=4,
axis_title_size = 8, axis_text_size=8, axis_title_face= "bold", plot_margin = margin(4, 4, 4, 4)),
updatemenus = list(list(type = 'dropdown',
xref = "paper",
yref = "paper",
xanchor = "left",
x = 0.04,
y = 0.95,
buttons = list(
list(method = "update",
args = list(list(y = list(temp_mean_wide$Admiralty)),
list(yaxis = list(title = "Temperature in Admiralty", range = c(0,40)))),label = "Admiralty"),
list(method = "update",
args = list(list(y = list(temp_mean_wide$`East Coast Parkway`)),
list(yaxis = list(title = "Temperature in East Coast Parkway", range = c(0,40)))),label = "East Coast Parkway"),
list(method = "update",
args = list(list(y = list(temp_mean_wide$`Ang Mo Kio`)),
list(yaxis = list(title = "Temperature in Ang Mo Kio", range = c(0,40)))),label = "Ang Mo Kio"),
list(method = "update",
args = list(list(y = list(temp_mean_wide$Newton)),
list(yaxis = list(title = "Temperature in Newton", range = c(0,40)))),label = "Newton"),
list(method = "update",
args = list(list(y = list(temp_mean_wide$`Tuas South`)),
list(yaxis = list(title = "Temperature in Tuas South", range = c(0,40)))),label = "Tuas South"),
list(method = "update",
args = list(list(y = list(temp_mean_wide$`Pasir Panjang`)),
list(yaxis = list(title = "Temperature in Pasir Panjang", range = c(0,40)))),label = "Pasir Panjang"),
list(method = "update",
args = list(list(y = list(temp_mean_wide$`Jurong Island`)),
list(yaxis = list(title = "Temperature in Jurong Island", range = c(0,40)))),label = "Jurong Island"),
list(method = "update",
args = list(list(y = list(temp_mean_wide$`Choa Chu Kang (South)`)),
list(yaxis = list(title = "Temperature in Choa Chu Kang (South)", range = c(0,40)))),label = "Choa Chu Kang (South)"),
list(method = "update",
args = list(list(y = list(temp_mean_wide$Changi)),
list(yaxis = list(title = "Temperature in Changi", range = c(0,40)))),label = "Changi"),
list(method = "update",
args = list(list(y = list(temp_mean_wide$`Tai Seng`)),
list(yaxis = list(title = "Temperature in Tai Seng", range = c(0,40)))),label = "Tai Seng"),
list(method = "update",
args = list(list(y = list(temp_mean_wide$`Jurong (West)`)),
list(yaxis = list(title = "Temperature in Jurong West", range = c(0,40)))),label = "Jurong West"),
list(method = "update",
args = list(list(y = list(temp_mean_wide$Clementi)),
list(yaxis = list(title = "Temperature in Clementi", range = c(0,40)))),label = "Clementi"),
list(method = "update",
args = list(list(y = list(temp_mean_wide$`Sentosa Island`)),
list(yaxis = list(title = "Temperature in Sentosa", range = c(0,40)))),label = "Sentosa"),
list(method = "update",
args = list(list(y = list(temp_mean_wide$`Macritchie Reservoir`)),
list(yaxis = list(title = "Temperature at Macritchie Reservoir", range = c(0,40)))),label = "Macritchie Reservoir"),
list(method = "update",
args = list(list(y = list(temp_mean_wide$`Lower Peirce Reservoir`)),
list(yaxis = list(title = "Temperature at Lower Peirce Reservoir", range = c(0,40)))),label = "Lower Peirce Reservoir"),
list(method = "update",
args = list(list(y = list(temp_mean_wide$`Lim Chu Kang`)),
list(yaxis = list(title = "Temperature at Lim Chu Kang", range = c(0,40)))),label = "Lim Chu Kang"),
list(method = "update",
args = list(list(y = list(temp_mean_wide$`Marine Parade`)),
list(yaxis = list(title = "Temperature at Marine Parade", range = c(0,40)))),label = "Marine Parade"),
list(method = "update",
args = list(list(y = list(temp_mean_wide$`Choa Chu Kang (Central)`)),
list(yaxis = list(title = "Temperature at Choa Chu Kang (Central)", range = c(0,40)))),label = "Choa Chu Kang (Central)"),
list(method = "update",
args = list(list(y = list(temp_mean_wide$`Choa Chu Kang (Central)`)),
list(yaxis = list(title = "Temperature at Choa Chu Kang (Central)", range = c(0,40)))),label = "Choa Chu Kang (Central)"),
list(method = "update",
args = list(list(y = list(temp_mean_wide$`Nicoll Highway`)),
list(yaxis = list(title = "Temperature at Nicoll Highway", range = c(0,40)))),label = "Nicoll Highway"),
list(method = "update",
args = list(list(y = list(temp_mean_wide$`Botanic Garden`)),
list(yaxis = list(title = "Temperature at Botanic Garden", range = c(0,40)))),label = "Botanic Garden"),
list(method = "update",
args = list(list(y = list(temp_mean_wide$Whampoa)),
list(yaxis = list(title = "Temperature at Whampoa", range = c(0,40)))),label = "Whampoa"),
list(method = "update",
args = list(list(y = list(temp_mean_wide$`Jurong Pier`)),
list(yaxis = list(title = "Temperature at Jurong Pier", range = c(0,40)))),label = "Jurong Pier"),
list(method = "update",
args = list(list(y = list(temp_mean_wide$`Ulu Pandan`)),
list(yaxis = list(title = "Temperature at Ulu Pandan", range = c(0,40)))),label = "Ulu Pandan"),
list(method = "update",
args = list(list(y = list(temp_mean_wide$Mandai)),
list(yaxis = list(title = "Temperature at Mandai", range = c(0,40)))),label = "Mandai"),
list(method = "update",
args = list(list(y = list(temp_mean_wide$`Bukit Panjang`)),
list(yaxis = list(title = "Temperature at Bukit Panjang", range = c(0,40)))),label = "Bukit Panjang"),
list(method = "update",
args = list(list(y = list(temp_mean_wide$`Kranji Reservoir`)),
list(yaxis = list(title = "Temperature at Kranji Reservoir", range = c(0,40)))),label = "Kranji Reservoir"),
list(method = "update",
args = list(list(y = list(temp_mean_wide$`Upper Peirce Reservoir`)),
list(yaxis = list(title = "Temperature at Upper Peirce Reservoir", range = c(0,40)))),label = "Upper Peirce Reservoir"),
list(method = "update",
args = list(list(y = list(temp_mean_wide$`Kent Ridge`)),
list(yaxis = list(title = "Temperature at Kent Ridge", range = c(0,40)))),label = "Kent Ridge"),
list(method = "update",
args = list(list(y = list(temp_mean_wide$Queenstown)),
list(yaxis = list(title = "Temperature at Queenstown", range = c(0,40)))),label = "Queenstown"),
list(method = "update",
args = list(list(y = list(temp_mean_wide$`Tanjong Katong`)),
list(yaxis = list(title = "Temperature at Tanjong Katong", range = c(0,40)))),label = "Tanjong Katong"),
list(method = "update",
args = list(list(y = list(temp_mean_wide$`Somerset (Road)`)),
list(yaxis = list(title = "Temperature at Somerset (Road)", range = c(0,40)))),label = "Somerset (Road)"),
list(method = "update",
args = list(list(y = list(temp_mean_wide$`Punggol`)),
list(yaxis = list(title = "Temperature at Punggol", range = c(0,40)))),label = "Punggol"),
list(method = "update",
args = list(list(y = list(temp_mean_wide$`Simei`)),
list(yaxis = list(title = "Temperature at Simei", range = c(0,40)))),label = "Simei"),
list(method = "update",
args = list(list(y = list(temp_mean_wide$`Toa Payoh`)),
list(yaxis = list(title = "Temperature at Toa Payoh", range = c(0,40)))),label = "Toa Payoh"),
list(method = "update",
args = list(list(y = list(temp_mean_wide$`Tuas`)),
list(yaxis = list(title = "Temperature at Tuas", range = c(0,40)))),label = "Tuas"),
list(method = "update",
args = list(list(y = list(temp_mean_wide$`Bukit Timah`)),
list(yaxis = list(title = "Temperature at Bukit Timah", range = c(0,40)))),label = "Bukit Timah"),
list(method = "update",
args = list(list(y = list(temp_mean_wide$`Pasir Ris (Central)`)),
list(yaxis = list(title = "Temperature at Pasir Ris (Central)", range = c(0,40)))),label = "Pasir Ris (Central)")
)))) Seems like there are some weather stations with no temperature data, some weather stations have temperature data for certain years, and some stations have temperature data from 1984 onwards.
Let’s explore further.
missing.values <- temp_mean_wide %>%
gather(key = "key", value = "val") %>%
mutate(isna = is.na(val)) %>%
group_by(key) %>%
mutate(total = n()) %>%
group_by(key, total, isna) %>%
summarise(num.isna = n()) %>%
mutate(pct = num.isna / total * 100)
levels <-
(missing.values %>% filter(isna == T) %>% arrange(desc(pct)))$key
percentage.plot <- missing.values %>%
ggplot() +
geom_bar(aes(x = reorder(key, desc(pct)),
y = pct, fill=isna),
stat = 'identity', alpha=0.8) +
scale_x_discrete(limits = levels) +
scale_fill_manual(name = "",
values = c('steelblue', 'tomato3'), labels = c("Present", "Missing")) +
coord_flip() +
labs(title = "Percentage of missing values", x =
'Variable', y = "% of missing values")
percentage.plot
Drop the stations with no weather data at all.
notempdata <- missing.values %>%
filter(isna == TRUE & pct==100)
notempdata$key [1] "Botanic Garden" "Bukit Panjang"
[3] "Bukit Timah" "Choa Chu Kang (Central)"
[5] "Jurong Pier" "Kent Ridge"
[7] "Kranji Reservoir" "Lim Chu Kang"
[9] "Lower Peirce Reservoir" "Macritchie Reservoir"
[11] "Mandai" "Marine Parade"
[13] "Nicoll Highway" "Pasir Ris (Central)"
[15] "Punggol" "Queenstown"
[17] "Simei" "Somerset (Road)"
[19] "Tanjong Katong" "Toa Payoh"
[21] "Tuas" "Ulu Pandan"
[23] "Upper Peirce Reservoir" "Whampoa"
stationstoremove <- c("Botanic Garden","Bukit Panjang","Bukit Timah","Choa Chu Kang (Central)","Jurong Pier","Kent Ridge", "Kranji Reservoir", "Lim Chu Kang", "Lower Peirce Reservoir", "Macritchie Reservoir","Mandai", "Marine Parade","Nicoll Highway", "Pasir Ris (Central)", "Punggol", "Queenstown","Simei", "Somerset (Road)","Tanjong Katong", "Toa Payoh", "Tuas", "Ulu Pandan", "Upper Peirce Reservoir","Whampoa")
#create a operator to exclude things
'%!in%' <- function(x,y)!('%in%'(x,y))
#excluded stations that have no temp data at all
temp_clean <- temp %>%
filter(station %!in% stationstoremove)
glimpse(temp_clean)Rows: 120,139
Columns: 5
$ station <chr> "Admiralty", "Admiralty", "Admiralty", "Admiralty"…
$ tdate <date> 2009-01-01, 2009-01-02, 2009-01-03, 2009-01-04, 2…
$ mean_temperature <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ maximum_temperature <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ minimum_temperature <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
temp_mean_widec <- temp_clean %>%
select(tdate, station, mean_temperature) %>%
pivot_wider(names_from = station, values_from = mean_temperature)
glimpse(temp_mean_widec)Rows: 16,071
Columns: 14
$ tdate <date> 2009-01-01, 2009-01-02, 2009-01-03, 2009-01-0…
$ Admiralty <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ `East Coast Parkway` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ `Ang Mo Kio` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ Newton <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ `Tuas South` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ `Pasir Panjang` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ `Jurong Island` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ `Choa Chu Kang (South)` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ Changi <dbl> 26.6, 26.4, 26.5, 26.3, 27.0, 27.4, 27.1, 27.0…
$ `Tai Seng` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ `Jurong (West)` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ Clementi <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ `Sentosa Island` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
plot_ly(data = temp_mean_widec,
x = ~tdate,
y = ~ Admiralty,
type = "scatter",
mode = "lines+markers") |>
layout(title = "Temperature observed by Weather Station",
xaxis = list(title = "Date"),
yaxis = list(title = "", range = c(0,40)),
theme_ipsum_rc(plot_title_size = 13, plot_title_margin=4, subtitle_size=11, subtitle_margin=4,
axis_title_size = 8, axis_text_size=8, axis_title_face= "bold", plot_margin = margin(4, 4, 4, 4)),
updatemenus = list(list(type = 'dropdown',
xref = "paper",
yref = "paper",
xanchor = "left",
x = 0.04,
y = 0.95,
buttons = list(
list(method = "update",
args = list(list(y = list(temp_mean_widec$Admiralty)),
list(yaxis = list(title = "Temperature in Admiralty", range = c(0,40)))),label = "Admiralty"),
list(method = "update",
args = list(list(y = list(temp_mean_widec$`East Coast Parkway`)),
list(yaxis = list(title = "Temperature in East Coast Parkway", range = c(0,40)))),label = "East Coast Parkway"),
list(method = "update",
args = list(list(y = list(temp_mean_widec$`Ang Mo Kio`)),
list(yaxis = list(title = "Temperature in Ang Mo Kio", range = c(0,40)))),label = "Ang Mo Kio"),
list(method = "update",
args = list(list(y = list(temp_mean_widec$Newton)),
list(yaxis = list(title = "Temperature in Newton", range = c(0,40)))),label = "Newton"),
list(method = "update",
args = list(list(y = list(temp_mean_widec$`Tuas South`)),
list(yaxis = list(title = "Temperature in Tuas South", range = c(0,40)))),label = "Tuas South"),
list(method = "update",
args = list(list(y = list(temp_mean_widec$`Pasir Panjang`)),
list(yaxis = list(title = "Temperature in Pasir Panjang", range = c(0,40)))),label = "Pasir Panjang"),
list(method = "update",
args = list(list(y = list(temp_mean_widec$`Jurong Island`)),
list(yaxis = list(title = "Temperature in Jurong Island", range = c(0,40)))),label = "Jurong Island"),
list(method = "update",
args = list(list(y = list(temp_mean_widec$`Choa Chu Kang (South)`)),
list(yaxis = list(title = "Temperature in Choa Chu Kang", range = c(0,40)))),label = "Choa Chu Kang"),
list(method = "update",
args = list(list(y = list(temp_mean_widec$Changi)),
list(yaxis = list(title = "Temperature in Changi", range = c(0,40)))),label = "Changi"),
list(method = "update",
args = list(list(y = list(temp_mean_widec$`Tai Seng`)),
list(yaxis = list(title = "Temperature in Tai Seng", range = c(0,40)))),label = "Tai Seng"),
list(method = "update",
args = list(list(y = list(temp_mean_widec$`Jurong (West)`)),
list(yaxis = list(title = "Temperature in Jurong West", range = c(0,40)))),label = "Jurong West"),
list(method = "update",
args = list(list(y = list(temp_mean_widec$Clementi)),
list(yaxis = list(title = "Temperature in Clementi", range = c(0,40)))),label = "Clementi"),
list(method = "update",
args = list(list(y = list(temp_mean_widec$`Sentosa Island`)),
list(yaxis = list(title = "Temperature in Sentosa", range = c(0,40)))),label = "Sentosa")
)))) There are some missing time gaps in the data. Daily data for data ranging over more than 20 years is too frequent for time series forecasting.
3.4 Preparing for time series forecasting
We are using changi weather station for now
changi <- temp %>%
filter(station == "Changi")
glimpse(changi)Rows: 16,071
Columns: 5
$ station <chr> "Changi", "Changi", "Changi", "Changi", "Changi", …
$ tdate <date> 1980-01-01, 1980-01-02, 1980-01-03, 1980-01-04, 1…
$ mean_temperature <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ maximum_temperature <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ minimum_temperature <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
changi <- changi %>%
drop_na()%>%
select(tdate, mean_temperature, maximum_temperature, minimum_temperature)
glimpse(changi)Rows: 15,340
Columns: 4
$ tdate <date> 1982-01-01, 1982-01-02, 1982-01-03, 1982-01-04, 1…
$ mean_temperature <dbl> 25.3, 24.7, 25.7, 26.3, 25.8, 23.7, 23.7, 24.4, 25…
$ maximum_temperature <dbl> 29.4, 26.2, 27.2, 29.8, 28.8, 24.9, 25.2, 27.6, 28…
$ minimum_temperature <dbl> 23.0, 23.5, 24.0, 24.1, 23.5, 21.9, 22.4, 22.8, 23…
range(changi$tdate)[1] "1982-01-01" "2023-12-31"
3.5 Convert df to timeseries objec
ts_regular gives the time series a regular interval by adding NA values for missing dates na.fill function fills those missing dates by extending values from previous days The window() function clips off the starting and ending dates so the number of years covered is a multiple of four. This will be needed later when the data needs to be aggregated into monthly periods.
changi_ts <- xts(changi[,c("mean_temperature", "maximum_temperature", "minimum_temperature")], order.by=as.Date(changi$tdate))
changi_ts <- ts_regular(changi_ts)
changi_ts <- na.fill(changi_ts, "extend")
changi_ts <- window(changi_ts, start = as.Date("1983-01-01"), end = as.Date("2023-12-31"))Check the class of changi_ts
class(changi_ts)[1] "xts" "zoo"
str(changi_ts)An xts object on 1983-01-01 / 2023-12-31 containing:
Data: double [14975, 3]
Columns: mean_temperature, maximum_temperature, minimum_temperature
Index: Date [14975] (TZ: "UTC")
A plot() of mean, max and min temperatures show the annual cycle of temperatures as well as extreme temperature events that spike above the general curve.
The ts_ts() function is used to plot ts objects rather than xts objects so that more control is available over the formatting of the charts.
plot(ts_ts(changi_ts$maximum_temperature), col="darkred", bty="n", las=1, fg=NA,
ylim=c(20, 40), ylab="Temperature (C)")
lines(ts_ts(changi_ts$minimum_temperature), col="navy")
lines(ts_ts(changi_ts$mean_temperature), col="darkgreen")
grid(nx=NA, ny=NULL, lty=1, col="gray")
legend("topright", fill=c("darkred", "darkgreen", "navy"), cex=0.7,
legend=c("MAX", "MEAN", "MIN"), bg="white")
3.5.1 Summary Statistics
Running the summary() function will give basic descriptive statistics for the time series fields. You can subset the time series with logical conditions to find the dates for extreme conditions.
summary(changi_ts) Index mean_temperature maximum_temperature minimum_temperature
Min. :1983-01-01 Min. :22.8 Min. :23.60 Min. :20.20
1st Qu.:1993-04-01 1st Qu.:26.9 1st Qu.:30.80 1st Qu.:24.00
Median :2003-07-02 Median :27.7 Median :31.80 Median :24.90
Mean :2003-07-02 Mean :27.7 Mean :31.54 Mean :24.95
3rd Qu.:2013-09-30 3rd Qu.:28.6 3rd Qu.:32.50 3rd Qu.:25.80
Max. :2023-12-31 Max. :30.9 Max. :36.00 Max. :29.10
3.5.2 Seasonal statistics
Time series representation also facilitates summary statistics of recurring periods, like months.
The format() function with the %m format descriptor isolates the month portion of the date. The split() function breaks a vector into a data frame with separate fields based on a specific factor, in this case the month. The as.numeric() conversion is used to remove the time series date index so that dates are not included in the summaries. The sapply() function runs the summary() function on each field in the split data frame.
changi_ts$MONTH <- format(index(changi_ts), "%m")
months <- split(as.numeric(changi_ts$maximum_temperature), changi_ts$MONTH)
sapply(months, summary) 1 2 3 4 5 6 7 8
Min. 23.60000 24.90000 24.60000 26.50000 26.30000 26.50000 25.50000 25.90000
1st Qu. 29.80000 30.92500 31.30000 31.70000 31.60000 31.20000 30.90000 30.70000
Median 30.90000 31.70000 32.40000 32.60000 32.40000 32.10000 31.70000 31.70000
Mean 30.49103 31.58377 32.13698 32.41984 32.24194 31.86537 31.41542 31.38631
3rd Qu. 31.60000 32.40000 33.20000 33.30000 33.10000 32.80000 32.40000 32.30000
Max. 35.20000 35.20000 36.00000 35.80000 35.40000 35.00000 34.00000 34.20000
9 10 11 12
Min. 26.30000 26.40000 24.20000 23.60000
1st Qu. 30.90000 31.00000 30.50000 29.80000
Median 31.70000 31.90000 31.50000 30.90000
Mean 31.47854 31.74673 31.22528 30.48521
3rd Qu. 32.30000 32.70000 32.20000 31.60000
Max. 34.40000 34.60000 34.60000 33.90000
months <- split(as.numeric(changi_ts$minimum_temperature), changi_ts$MONTH)
sapply(months, summary) 1 2 3 4 5 6 7 8
Min. 21.20000 21.10000 20.200 21.40000 21.20000 21.10000 20.50000 21.10000
1st Qu. 23.60000 23.90000 24.100 24.50000 24.90000 24.60000 24.30000 24.20000
Median 24.20000 24.50000 24.900 25.25000 25.70000 25.60000 25.40000 25.40000
Mean 24.20543 24.50017 24.838 25.23528 25.66452 25.57211 25.35083 25.26609
3rd Qu. 24.80000 25.10000 25.600 26.00000 26.60000 26.60000 26.50000 26.40000
Max. 27.00000 26.90000 27.500 28.40000 29.10000 28.40000 28.60000 28.20000
9 10 11 12
Min. 21.00000 20.90000 21.30000 21.20000
1st Qu. 24.10000 24.20000 23.90000 23.70000
Median 25.10000 25.00000 24.50000 24.30000
Mean 25.04301 24.94154 24.49407 24.28521
3rd Qu. 26.10000 25.70000 25.10000 24.90000
Max. 27.90000 28.20000 27.30000 26.60000
months <- split(as.numeric(changi_ts$mean_temperature), changi_ts$MONTH)
sapply(months, summary) 1 2 3 4 5 6 7 8
Min. 22.80000 22.90000 23.70000 25.30000 24.60000 25.3000 24.00000 24.60000
1st Qu. 26.10000 26.70000 27.00000 27.50000 27.80000 27.7000 27.35000 27.30000
Median 26.80000 27.30000 27.80000 28.20000 28.50000 28.5000 28.30000 28.10000
Mean 26.69575 27.24542 27.71243 28.16374 28.52258 28.4239 28.09677 27.99622
3rd Qu. 27.40000 27.80000 28.50000 28.80000 29.30000 29.2000 29.00000 28.80000
Max. 29.70000 29.90000 30.50000 30.70000 30.90000 30.8000 30.20000 30.20000
9 10 11 12
Min. 24.10000 24.70000 23.30000 23.00000
1st Qu. 27.20000 27.10000 26.52500 26.10000
Median 28.00000 27.90000 27.10000 26.80000
Mean 27.85667 27.80913 27.15317 26.69984
3rd Qu. 28.70000 28.50000 27.70000 27.40000
Max. 29.80000 30.40000 30.00000 29.60000
3.5.3 Time Series Decomposition
Time series decomposition separates a time series into three fundamental components that can be added together to create the original data:
A seasonal component A long-term trend component A random component When using historical data to anticipate what might happen in the future, we often wish to separates the seasonal and random components to see the long term historical trends.
The stl() function performs this decomposition.
decomposition <- stl(ts_ts(changi_ts$mean_temperature), s.window=365, t.window = 14001)
summary(decomposition) Call:
stl(x = ts_ts(changi_ts$mean_temperature), s.window = 365, t.window = 14001)
Time.series components:
seasonal trend remainder
Min. :-1.3289742 Min. :27.17374 Min. :-4.145266
1st Qu.:-0.5143364 1st Qu.:27.47242 1st Qu.:-0.606672
Median : 0.1954581 Median :27.74174 Median : 0.089437
Mean :-0.0006473 Mean :27.69026 Mean : 0.009567
3rd Qu.: 0.4367594 3rd Qu.:27.91279 3rd Qu.: 0.703569
Max. : 1.1109584 Max. :28.07372 Max. : 2.905331
IQR:
STL.seasonal STL.trend STL.remainder data
0.9511 0.4404 1.3102 1.7000
% 55.9 25.9 77.1 100.0
Weights: all == 1
Other components: List of 5
$ win : Named num [1:3] 365 14001 365
$ deg : Named int [1:3] 0 1 1
$ jump : Named num [1:3] 37 1401 37
$ inner: int 2
$ outer: int 0
decomposition %>%
autoplot()
in this example, the trend indicates an increase of around 0.4 degrees celsius in daily maximum temperatures over the period 1983 to 2023. We also see there is some seasonality in (dry to wet seasons) of around 2.5 degrees celsius and random daily variation of around 8 degrees.
Note: try changing the t.window and see how this would affect!
decomposition_max <- stl(ts_ts(changi_ts$maximum_temperature), s.window=365, t.window = 14001)
summary(decomposition_max) Call:
stl(x = ts_ts(changi_ts$maximum_temperature), s.window = 365,
t.window = 14001)
Time.series components:
seasonal trend remainder
Min. :-1.7090322 Min. :31.34394 Min. :-7.175268
1st Qu.:-0.3690903 1st Qu.:31.45873 1st Qu.:-0.687818
Median : 0.0233567 Median :31.54728 Median : 0.249546
Mean :-0.0007481 Mean :31.53549 Mean : 0.002352
3rd Qu.: 0.5299052 3rd Qu.:31.61432 3rd Qu.: 0.952924
Max. : 1.2867955 Max. :31.69634 Max. : 4.540526
IQR:
STL.seasonal STL.trend STL.remainder data
0.8990 0.1556 1.6407 1.7000
% 52.9 9.2 96.5 100.0
Weights: all == 1
Other components: List of 5
$ win : Named num [1:3] 365 14001 365
$ deg : Named int [1:3] 0 1 1
$ jump : Named num [1:3] 37 1401 37
$ inner: int 2
$ outer: int 0
decomposition_max %>%
autoplot()
decomposition_min <- stl(ts_ts(changi_ts$minimum_temperature), s.window=365, t.window = 14001)
summary(decomposition_min) Call:
stl(x = ts_ts(changi_ts$minimum_temperature), s.window = 365,
t.window = 14001)
Time.series components:
seasonal trend remainder
Min. :-0.9994999 Min. :24.39684 Min. :-4.653954
1st Qu.:-0.4866771 1st Qu.:24.67177 1st Qu.:-0.696582
Median : 0.0508366 Median :24.94177 Median : 0.055812
Mean :-0.0004214 Mean :24.95344 Mean :-0.001433
3rd Qu.: 0.4047768 3rd Qu.:25.23570 3rd Qu.: 0.771620
Max. : 1.1033537 Max. :25.53458 Max. : 3.964983
IQR:
STL.seasonal STL.trend STL.remainder data
0.8915 0.5639 1.4682 1.8000
% 49.5 31.3 81.6 100.0
Weights: all == 1
Other components: List of 5
$ win : Named num [1:3] 365 14001 365
$ deg : Named int [1:3] 0 1 1
$ jump : Named num [1:3] 37 1401 37
$ inner: int 2
$ outer: int 0
decomposition_min %>%
autoplot()
3.5.4 Aggregating values by months
Although having daily weather observations is extremely useful for analysis, direct visualization of daily observations relies on inexact (and occasionally deceptive) visual identification of patterns and trends. Visualization of decomposition captures trends.
One common technique to address this issue with time series data is to aggregate values by months or years.
With xts time series objects, the period.apply() function can be used to perform operations on blocks of observations across the time series. Those operations are specified with the FUN= parameter and commonly include min(), max(), and mean(). period.apply() requires a regular (even) interval between periods. Because months are uneven, a seq() of regularly spaced index values is passed to the INDEX= parameter. The INDEX value is is a regular spacing that approximates 12 months per year when leap years are considered (365 + 365 + 365 + 366 / 48 = 30.4375). Because the periods need an even number of observations, the window() dates should be chosen so they encompass even multiples of four years to account for leap years.
monthly_avgmeantemp <- period.apply(changi_ts$mean_temperature, INDEX = seq(1, nrow(changi_ts) - 1, 30.4375), FUN = colMeans)
plot(ts_ts(monthly_avgmeantemp), col="darkred", ylim=c(20, 40),
lwd=3, bty="n", las=1, fg=NA, ylab="TMAX (C)")
grid(nx=NA, ny=NULL, lty=1)
4 Building Models
# create samples
training <- ts_ts(window(monthly_avgmeantemp, start = as.Date('1983-01-01'), end = as.Date('2015-12-31')))
validation <- ts_ts(window(monthly_avgmeantemp, start = as.Date('2016-01-01')))4.1 Naive method
naive_model <- naive(training, h = length(validation))
MAPE(naive_model$mean, validation) * 100[1] 2.163031
summary(naive_model)
Forecast method: Naive method
Model Information:
Call: naive(y = training, h = length(validation))
Residual sd: 0.6067
Error measures:
ME RMSE MAE MPE MAPE MASE
Training set 0.00364557 0.606687 0.4865316 -0.01086379 1.765102 0.9689419
ACF1
Training set -0.01453341
Forecasts:
Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
Jan 2016 27.94 27.16250 28.71750 26.75092 29.12908
Feb 2016 27.94 26.84045 29.03955 26.25838 29.62162
Mar 2016 27.94 26.59333 29.28667 25.88044 29.99956
Apr 2016 27.94 26.38500 29.49500 25.56183 30.31817
May 2016 27.94 26.20146 29.67854 25.28113 30.59887
Jun 2016 27.94 26.03552 29.84448 25.02735 30.85265
Jul 2016 27.94 25.88293 29.99707 24.79398 31.08602
Aug 2016 27.94 25.74090 30.13910 24.57676 31.30324
Sep 2016 27.94 25.60750 30.27250 24.37275 31.50725
Oct 2016 27.94 25.48133 30.39867 24.17978 31.70022
Nov 2016 27.94 25.36132 30.51868 23.99625 31.88375
Dec 2016 27.94 25.24666 30.63334 23.82089 32.05911
Jan 2017 27.94 25.13668 30.74332 23.65269 32.22731
Feb 2017 27.94 25.03086 30.84914 23.49085 32.38915
Mar 2017 27.94 24.92875 30.95125 23.33469 32.54531
Apr 2017 27.94 24.83000 31.05000 23.18366 32.69634
May 2017 27.94 24.73428 31.14572 23.03728 32.84272
Jun 2017 27.94 24.64134 31.23866 22.89514 32.98486
Jul 2017 27.94 24.55095 31.32905 22.75690 33.12310
Aug 2017 27.94 24.46291 31.41709 22.62225 33.25775
Sep 2017 27.94 24.37704 31.50296 22.49093 33.38907
Oct 2017 27.94 24.29320 31.58680 22.36270 33.51730
Nov 2017 27.94 24.21124 31.66876 22.23735 33.64265
Dec 2017 27.94 24.13104 31.74896 22.11470 33.76530
Jan 2018 27.94 24.05250 31.82750 21.99458 33.88542
Feb 2018 27.94 23.97551 31.90449 21.87683 34.00317
Mar 2018 27.94 23.89999 31.98001 21.76133 34.11867
Apr 2018 27.94 23.82585 32.05415 21.64796 34.23204
May 2018 27.94 23.75303 32.12697 21.53658 34.34342
Jun 2018 27.94 23.68145 32.19855 21.42711 34.45289
Jul 2018 27.94 23.61106 32.26894 21.31946 34.56054
Aug 2018 27.94 23.54179 32.33821 21.21352 34.66648
Sep 2018 27.94 23.47360 32.40640 21.10923 34.77077
Oct 2018 27.94 23.40643 32.47357 21.00650 34.87350
Nov 2018 27.94 23.34024 32.53976 20.90528 34.97472
Dec 2018 27.94 23.27500 32.60500 20.80549 35.07451
Jan 2019 27.94 23.21065 32.66935 20.70708 35.17292
Feb 2019 27.94 23.14716 32.73284 20.60999 35.27001
Mar 2019 27.94 23.08451 32.79549 20.51417 35.36583
Apr 2019 27.94 23.02265 32.85735 20.41957 35.46043
May 2019 27.94 22.96157 32.91843 20.32614 35.55386
Jun 2019 27.94 22.90122 32.97878 20.23385 35.64615
Jul 2019 27.94 22.84159 33.03841 20.14265 35.73735
Aug 2019 27.94 22.78264 33.09736 20.05250 35.82750
Sep 2019 27.94 22.72437 33.15563 19.96338 35.91662
Oct 2019 27.94 22.66673 33.21327 19.87524 36.00476
Nov 2019 27.94 22.60972 33.27028 19.78805 36.09195
Dec 2019 27.94 22.55332 33.32668 19.70178 36.17822
Jan 2020 27.94 22.49750 33.38250 19.61641 36.26359
Feb 2020 27.94 22.44224 33.43776 19.53190 36.34810
Mar 2020 27.94 22.38753 33.49247 19.44824 36.43176
Apr 2020 27.94 22.33336 33.54664 19.36539 36.51461
May 2020 27.94 22.27971 33.60029 19.28333 36.59667
Jun 2020 27.94 22.22656 33.65344 19.20205 36.67795
Jul 2020 27.94 22.17390 33.70610 19.12151 36.75849
Aug 2020 27.94 22.12172 33.75828 19.04170 36.83830
Sep 2020 27.94 22.07000 33.81000 18.96261 36.91739
Oct 2020 27.94 22.01873 33.86127 18.88420 36.99580
Nov 2020 27.94 21.96790 33.91210 18.80647 37.07353
Dec 2020 27.94 21.91751 33.96249 18.72939 37.15061
Jan 2021 27.94 21.86753 34.01247 18.65295 37.22705
Feb 2021 27.94 21.81795 34.06205 18.57714 37.30286
Mar 2021 27.94 21.76878 34.11122 18.50193 37.37807
Apr 2021 27.94 21.71999 34.16001 18.42732 37.45268
May 2021 27.94 21.67159 34.20841 18.35329 37.52671
Jun 2021 27.94 21.62355 34.25645 18.27983 37.60017
Jul 2021 27.94 21.57588 34.30412 18.20692 37.67308
Aug 2021 27.94 21.52856 34.35144 18.13456 37.74544
Sep 2021 27.94 21.48159 34.39841 18.06272 37.81728
Oct 2021 27.94 21.43496 34.44504 17.99140 37.88860
Nov 2021 27.94 21.38866 34.49134 17.92059 37.95941
Dec 2021 27.94 21.34269 34.53731 17.85028 38.02972
Jan 2022 27.94 21.29703 34.58297 17.78046 38.09954
Feb 2022 27.94 21.25169 34.62831 17.71111 38.16889
Mar 2022 27.94 21.20665 34.67335 17.64222 38.23778
Apr 2022 27.94 21.16191 34.71809 17.57380 38.30620
May 2022 27.94 21.11746 34.76254 17.50582 38.37418
Jun 2022 27.94 21.07330 34.80670 17.43829 38.44171
Jul 2022 27.94 21.02942 34.85058 17.37118 38.50882
Aug 2022 27.94 20.98582 34.89418 17.30450 38.57550
Sep 2022 27.94 20.94249 34.93751 17.23824 38.64176
Oct 2022 27.94 20.89943 34.98057 17.17238 38.70762
Nov 2022 27.94 20.85663 35.02337 17.10692 38.77308
Dec 2022 27.94 20.81409 35.06591 17.04186 38.83814
Jan 2023 27.94 20.77180 35.10820 16.97718 38.90282
Feb 2023 27.94 20.72976 35.15024 16.91288 38.96712
Mar 2023 27.94 20.68796 35.19204 16.84896 39.03104
Apr 2023 27.94 20.64640 35.23360 16.78540 39.09460
May 2023 27.94 20.60507 35.27493 16.72220 39.15780
Jun 2023 27.94 20.56398 35.31602 16.65935 39.22065
Jul 2023 27.94 20.52312 35.35688 16.59685 39.28315
Aug 2023 27.94 20.48248 35.39752 16.53470 39.34530
Sep 2023 27.94 20.44205 35.43795 16.47288 39.40712
Oct 2023 27.94 20.40185 35.47815 16.41140 39.46860
Nov 2023 27.94 20.36186 35.51814 16.35024 39.52976
Dec 2023 27.94 20.32208 35.55792 16.28940 39.59060
Jan 2024 27.94 20.28251 35.59749 16.22887 39.65113
4.2 Seasonal Naive method
snaive_model <- snaive(training, h = length(validation))
MAPE(snaive_model$mean, validation) * 100[1] 1.719337
summary(snaive_model)
Forecast method: Seasonal naive method
Model Information:
Call: snaive(y = training, h = length(validation))
Residual sd: 0.6436
Error measures:
ME RMSE MAE MPE MAPE MASE ACF1
Training set 0.0140191 0.6435697 0.5021267 0.02344714 1.82024 1 0.5350252
Forecasts:
Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
Jan 2016 27.08000 26.25523 27.90477 25.81863 28.34137
Feb 2016 26.89667 26.07190 27.72143 25.63529 28.15804
Mar 2016 27.06667 26.24190 27.89143 25.80529 28.32804
Apr 2016 28.10667 27.28190 28.93143 26.84529 29.36804
May 2016 28.65333 27.82857 29.47810 27.39196 29.91471
Jun 2016 28.81667 27.99190 29.64143 27.55529 30.07804
Jul 2016 28.95333 28.12857 29.77810 27.69196 30.21471
Aug 2016 28.98667 28.16190 29.81143 27.72529 30.24804
Sep 2016 28.47667 27.65190 29.30143 27.21529 29.73804
Oct 2016 28.66000 27.83523 29.48477 27.39863 29.92137
Nov 2016 28.67333 27.84857 29.49810 27.41196 29.93471
Dec 2016 27.94000 27.11523 28.76477 26.67863 29.20137
Jan 2017 27.08000 25.91360 28.24640 25.29615 28.86385
Feb 2017 26.89667 25.73027 28.06306 25.11282 28.68052
Mar 2017 27.06667 25.90027 28.23306 25.28282 28.85052
Apr 2017 28.10667 26.94027 29.27306 26.32282 29.89052
May 2017 28.65333 27.48694 29.81973 26.86948 30.43718
Jun 2017 28.81667 27.65027 29.98306 27.03282 30.60052
Jul 2017 28.95333 27.78694 30.11973 27.16948 30.73718
Aug 2017 28.98667 27.82027 30.15306 27.20282 30.77052
Sep 2017 28.47667 27.31027 29.64306 26.69282 30.26052
Oct 2017 28.66000 27.49360 29.82640 26.87615 30.44385
Nov 2017 28.67333 27.50694 29.83973 26.88948 30.45718
Dec 2017 27.94000 26.77360 29.10640 26.15615 29.72385
Jan 2018 27.08000 25.65146 28.50854 24.89524 29.26476
Feb 2018 26.89667 25.46813 28.32521 24.71190 29.08143
Mar 2018 27.06667 25.63813 28.49521 24.88190 29.25143
Apr 2018 28.10667 26.67813 29.53521 25.92190 30.29143
May 2018 28.65333 27.22479 30.08187 26.46857 30.83810
Jun 2018 28.81667 27.38813 30.24521 26.63190 31.00143
Jul 2018 28.95333 27.52479 30.38187 26.76857 31.13810
Aug 2018 28.98667 27.55813 30.41521 26.80190 31.17143
Sep 2018 28.47667 27.04813 29.90521 26.29190 30.66143
Oct 2018 28.66000 27.23146 30.08854 26.47524 30.84476
Nov 2018 28.67333 27.24479 30.10187 26.48857 30.85810
Dec 2018 27.94000 26.51146 29.36854 25.75524 30.12476
Jan 2019 27.08000 25.43046 28.72954 24.55725 29.60275
Feb 2019 26.89667 25.24713 28.54620 24.37392 29.41941
Mar 2019 27.06667 25.41713 28.71620 24.54392 29.58941
Apr 2019 28.10667 26.45713 29.75620 25.58392 30.62941
May 2019 28.65333 27.00380 30.30287 26.13059 31.17608
Jun 2019 28.81667 27.16713 30.46620 26.29392 31.33941
Jul 2019 28.95333 27.30380 30.60287 26.43059 31.47608
Aug 2019 28.98667 27.33713 30.63620 26.46392 31.50941
Sep 2019 28.47667 26.82713 30.12620 25.95392 30.99941
Oct 2019 28.66000 27.01046 30.30954 26.13725 31.18275
Nov 2019 28.67333 27.02380 30.32287 26.15059 31.19608
Dec 2019 27.94000 26.29046 29.58954 25.41725 30.46275
Jan 2020 27.08000 25.23576 28.92424 24.25948 29.90052
Feb 2020 26.89667 25.05243 28.74090 24.07615 29.71718
Mar 2020 27.06667 25.22243 28.91090 24.24615 29.88718
Apr 2020 28.10667 26.26243 29.95090 25.28615 30.92718
May 2020 28.65333 26.80910 30.49757 25.83282 31.47385
Jun 2020 28.81667 26.97243 30.66090 25.99615 31.63718
Jul 2020 28.95333 27.10910 30.79757 26.13282 31.77385
Aug 2020 28.98667 27.14243 30.83090 26.16615 31.80718
Sep 2020 28.47667 26.63243 30.32090 25.65615 31.29718
Oct 2020 28.66000 26.81576 30.50424 25.83948 31.48052
Nov 2020 28.67333 26.82910 30.51757 25.85282 31.49385
Dec 2020 27.94000 26.09576 29.78424 25.11948 30.76052
Jan 2021 27.08000 25.05974 29.10026 23.99028 30.16972
Feb 2021 26.89667 24.87641 28.91693 23.80695 29.98639
Mar 2021 27.06667 25.04641 29.08693 23.97695 30.15639
Apr 2021 28.10667 26.08641 30.12693 25.01695 31.19639
May 2021 28.65333 26.63307 30.67359 25.56361 31.74305
Jun 2021 28.81667 26.79641 30.83693 25.72695 31.90639
Jul 2021 28.95333 26.93307 30.97359 25.86361 32.04305
Aug 2021 28.98667 26.96641 31.00693 25.89695 32.07639
Sep 2021 28.47667 26.45641 30.49693 25.38695 31.56639
Oct 2021 28.66000 26.63974 30.68026 25.57028 31.74972
Nov 2021 28.67333 26.65307 30.69359 25.58361 31.76305
Dec 2021 27.94000 25.91974 29.96026 24.85028 31.02972
Jan 2022 27.08000 24.89787 29.26213 23.74272 30.41728
Feb 2022 26.89667 24.71454 29.07880 23.55939 30.23395
Mar 2022 27.06667 24.88454 29.24880 23.72939 30.40395
Apr 2022 28.10667 25.92454 30.28880 24.76939 31.44395
May 2022 28.65333 26.47120 30.83546 25.31605 31.99061
Jun 2022 28.81667 26.63454 30.99880 25.47939 32.15395
Jul 2022 28.95333 26.77120 31.13546 25.61605 32.29061
Aug 2022 28.98667 26.80454 31.16880 25.64939 32.32395
Sep 2022 28.47667 26.29454 30.65880 25.13939 31.81395
Oct 2022 28.66000 26.47787 30.84213 25.32272 31.99728
Nov 2022 28.67333 26.49120 30.85546 25.33605 32.01061
Dec 2022 27.94000 25.75787 30.12213 24.60272 31.27728
Jan 2023 27.08000 24.74720 29.41280 23.51230 30.64770
Feb 2023 26.89667 24.56387 29.22946 23.32896 30.46437
Mar 2023 27.06667 24.73387 29.39946 23.49896 30.63437
Apr 2023 28.10667 25.77387 30.43946 24.53896 31.67437
May 2023 28.65333 26.32054 30.98613 25.08563 32.22104
Jun 2023 28.81667 26.48387 31.14946 25.24896 32.38437
Jul 2023 28.95333 26.62054 31.28613 25.38563 32.52104
Aug 2023 28.98667 26.65387 31.31946 25.41896 32.55437
Sep 2023 28.47667 26.14387 30.80946 24.90896 32.04437
Oct 2023 28.66000 26.32720 30.99280 25.09230 32.22770
Nov 2023 28.67333 26.34054 31.00613 25.10563 32.24104
Dec 2023 27.94000 25.60720 30.27280 24.37230 31.50770
Jan 2024 27.08000 24.60570 29.55430 23.29588 30.86412
4.3 State Space Models
ets_modelT <- ets(training, allow.multiplicative.trend = TRUE)
summary(ets_modelT)ETS(A,N,A)
Call:
ets(y = training, allow.multiplicative.trend = TRUE)
Smoothing parameters:
alpha = 0.4445
gamma = 1e-04
Initial states:
l = 27.6201
s = -0.5352 0.0884 0.155 0.28 0.3454 0.7757
0.8227 0.4922 0.0699 -0.4203 -1.0135 -1.0602
sigma: 0.4191
AIC AICc BIC
1695.638 1696.901 1755.359
Training set error measures:
ME RMSE MAE MPE MAPE MASE
Training set 0.004836528 0.4116305 0.3275042 -0.0002696186 1.186142 0.6522342
ACF1
Training set 0.001489326
Multiplicative errors No trend Addictive seasonnality
ets_forecastT <- forecast(ets_modelT, h=length(validation))
MAPE(ets_forecastT$mean, validation) *100[1] 1.894772
ets_forecastT Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
Jan 2016 27.41122 26.87412 27.94833 26.58979 28.23265
Feb 2016 27.45798 26.87020 28.04575 26.55905 28.35691
Mar 2016 28.05119 27.41678 28.68561 27.08094 29.02145
Apr 2016 28.54114 27.86329 29.21899 27.50446 29.57783
May 2016 28.96364 28.24497 29.68231 27.86453 30.06275
Jun 2016 29.29409 28.53680 30.05138 28.13592 30.45226
Jul 2016 29.24717 28.45314 30.04120 28.03281 30.46154
Aug 2016 28.81692 27.98777 29.64607 27.54885 30.08499
Sep 2016 28.75134 27.88851 29.61418 27.43175 30.07094
Oct 2016 28.62637 27.73111 29.52163 27.25719 29.99555
Nov 2016 28.55976 27.63322 29.48631 27.14273 29.97679
Dec 2016 27.93617 26.97935 28.89299 26.47283 29.39950
Jan 2017 27.41122 26.42506 28.39738 25.90302 28.91942
Feb 2017 27.45798 26.44333 28.47263 25.90621 29.00975
Mar 2017 28.05119 27.00883 29.09355 26.45704 29.64534
Apr 2017 28.54114 27.47179 29.61049 26.90571 30.17657
May 2017 28.96364 27.86796 30.05932 27.28794 30.63933
Jun 2017 29.29409 28.17270 30.41548 27.57907 31.00911
Jul 2017 29.24717 28.10065 30.39369 27.49372 31.00063
Aug 2017 28.81692 27.64580 29.98803 27.02585 30.60799
Sep 2017 28.75134 27.55614 29.94655 26.92344 30.57925
Oct 2017 28.62637 27.40755 29.84519 26.76235 30.49039
Nov 2017 28.55976 27.31778 29.80174 26.66032 30.45921
Dec 2017 27.93617 26.67144 29.20090 26.00193 29.87041
Jan 2018 27.41122 26.12415 28.69829 25.44282 29.37962
Feb 2018 27.45798 26.14895 28.76700 25.45600 29.45996
Mar 2018 28.05119 26.72058 29.38181 26.01619 30.08620
Apr 2018 28.54114 27.18927 29.89301 26.47364 30.60864
May 2018 28.96364 27.59085 30.33642 26.86414 31.06313
Jun 2018 29.29409 27.90070 30.68748 27.16308 31.42510
Jul 2018 29.24717 27.83347 30.66087 27.08511 31.40924
Aug 2018 28.81692 27.38320 30.25064 26.62424 31.00960
Sep 2018 28.75134 27.29788 30.20480 26.52847 30.97422
Oct 2018 28.62637 27.15343 30.09931 26.37371 30.87903
Nov 2018 28.55976 27.06760 30.05192 26.27770 30.84182
Dec 2018 27.93617 26.42502 29.44732 25.62507 30.24727
Jan 2019 27.41122 25.88133 28.94111 25.07145 29.75099
Feb 2019 27.45798 25.90957 29.00639 25.08989 29.82606
Mar 2019 28.05119 26.48449 29.61790 25.65512 30.44726
Apr 2019 28.54114 26.95635 30.12593 26.11741 30.96487
May 2019 28.96364 27.36096 30.56631 26.51256 31.41472
Jun 2019 29.29409 27.67373 30.91445 26.81596 31.77222
Jul 2019 29.24717 27.60931 30.88503 26.74229 31.75206
Aug 2019 28.81692 27.16175 30.47208 26.28556 31.34828
Sep 2019 28.75134 27.07905 30.42364 26.19379 31.30890
Oct 2019 28.62637 26.93712 30.31562 26.04288 31.20986
Nov 2019 28.55976 26.85372 30.26580 25.95060 31.16892
Dec 2019 27.93617 26.21350 29.65884 25.30157 30.57077
Jan 2020 27.41122 25.67208 29.15036 24.75144 30.07100
Feb 2020 27.45798 25.70253 29.21343 24.77325 30.14270
Mar 2020 28.05119 26.27958 29.82280 25.34175 30.76064
Apr 2020 28.54114 26.75352 30.32877 25.80720 31.27508
May 2020 28.96364 27.16014 30.76714 26.20543 31.72185
Jun 2020 29.29409 27.47486 31.11332 26.51181 32.07636
Jul 2020 29.24717 27.41234 31.08200 26.44104 32.05330
Aug 2020 28.81692 26.96662 30.66722 25.98713 31.64671
Sep 2020 28.75134 26.88571 30.61698 25.89810 31.60459
Oct 2020 28.62637 26.74552 30.50722 25.74986 31.50289
Nov 2020 28.55976 26.66382 30.45570 25.66016 31.45936
Dec 2020 27.93617 26.02525 29.84709 25.01366 30.85868
Jan 2021 27.41122 25.48544 29.33700 24.46599 30.35645
Feb 2021 27.45798 25.51745 29.39850 24.49020 30.42575
Mar 2021 28.05119 26.09604 30.00635 25.06104 31.04134
Apr 2021 28.54114 26.57146 30.51082 25.52878 31.55350
May 2021 28.96364 26.97954 30.94773 25.92923 31.99805
Jun 2021 29.29409 27.29568 31.29250 26.23779 32.35039
Jul 2021 29.24717 27.23455 31.25979 26.16914 32.32520
Aug 2021 28.81692 26.79019 30.84365 25.71730 31.91653
Sep 2021 28.75134 26.71060 30.79209 25.63030 31.87239
Oct 2021 28.62637 26.57171 30.68103 25.48404 31.76871
Nov 2021 28.55976 26.49128 30.62825 25.39628 31.72324
Dec 2021 27.93617 25.85394 30.01839 24.75168 31.12066
Jan 2022 27.41122 25.31535 29.50709 24.20587 30.61657
Feb 2022 27.45798 25.34855 29.56740 24.23189 30.68406
Mar 2022 28.05119 25.92830 30.17408 24.80451 31.29787
Apr 2022 28.54114 26.40487 30.67741 25.27399 31.80829
May 2022 28.96364 26.81407 31.11321 25.67615 32.25113
Jun 2022 29.29409 27.13130 31.45688 25.98639 32.60179
Jul 2022 29.24717 27.07124 31.42310 25.91938 32.57497
Aug 2022 28.81692 26.62793 31.00591 25.46915 32.16468
Sep 2022 28.75134 26.54938 30.95331 25.38372 32.11896
Oct 2022 28.62637 26.41150 30.84124 25.23902 32.01373
Nov 2022 28.55976 26.33206 30.78746 25.15278 31.96674
Dec 2022 27.93617 25.69570 30.17664 24.50967 31.36267
Jan 2023 27.41122 25.15807 29.66437 23.96532 30.85712
Feb 2023 27.45798 25.19221 29.72374 23.99279 30.92317
Mar 2023 28.05119 25.77288 30.32950 24.56682 31.53556
Apr 2023 28.54114 26.25036 30.83192 25.03769 32.04459
May 2023 28.96364 26.66045 31.26683 25.44121 32.48607
Jun 2023 29.29409 26.97856 31.60962 25.75279 32.83539
Jul 2023 29.24717 26.91936 31.57498 25.68710 32.80725
Aug 2023 28.81692 26.47690 31.15694 25.23817 32.39567
Sep 2023 28.75134 26.39918 31.10351 25.15401 32.34867
Oct 2023 28.62637 26.26212 30.99062 25.01056 32.24218
Nov 2023 28.55976 26.18348 30.93604 24.92556 32.19396
Dec 2023 27.93617 25.54792 30.32442 24.28366 31.58868
Jan 2024 27.41122 25.01107 29.81137 23.74051 31.08193
plot(monthly_avgmeantemp, col="blue", xlab="Year", ylab="Daily Max Temp", main="ETS Forecast", type='l')
lines(ets_forecastT$mean, col="red", lwd=2)
4.4 Holt-Winters
hw_model <- hw(training, h = length(validation))
MAPE(hw_model$mean, validation)*100[1] 1.829896
4.5 ARIMA
arima_optimal <- auto.arima(training)
summary(arima_optimal)Series: training
ARIMA(0,0,1)(2,1,0)[12] with drift
Coefficients:
ma1 sar1 sar2 drift
0.4103 -0.5036 -0.3495 0.0018
s.e. 0.0402 0.0498 0.0500 0.0017
sigma^2 = 0.2527: log likelihood = -281.3
AIC=572.6 AICc=572.76 BIC=592.36
Training set error measures:
ME RMSE MAE MPE MAPE MASE
Training set -0.009596971 0.4924452 0.386962 -0.05926227 1.401805 0.7706462
ACF1
Training set 0.1099722
sarima_forecast <- sarima.for(training, n.ahead=length(validation),
p=0,d=0,q=1,P=2,D=1,Q=0,S=12)
sarima_forecast$pred
Jan Feb Mar Apr May Jun Jul Aug
2016 27.06569 26.87211 26.98105 28.19028 28.47709 28.57984 29.03199 28.54374
2017 26.90637 26.68068 27.13374 28.13427 28.46456 28.63742 29.06235 28.71326
2018 27.03127 26.82533 27.12645 28.17293 28.57214 28.73087 29.05924 28.82237
2019 27.06373 26.85907 27.11643 28.21271 28.56202 28.70336 29.08987 28.74785
2020 27.04341 26.83120 27.16370 28.21884 28.56919 28.72423 29.11521 28.78692
2021 27.08198 26.87312 27.18307 28.24152 28.60879 28.76301 29.13142 28.83296
2022 27.10933 26.90142 27.19647 28.26763 28.62602 28.77586 29.15408 28.83580
2023 27.12175 26.91219 27.22263 28.28623 28.64318 28.79551 29.17668 28.85795
2024 27.14561
Sep Oct Nov Dec
2016 28.19265 28.29693 28.31795 27.62436
2017 28.07712 28.46468 28.43057 27.64590
2018 28.27423 28.54677 28.53774 27.78504
2019 28.25502 28.48648 28.48409 27.74712
2020 28.23548 28.52782 28.51333 27.75726
2021 28.29171 28.56775 28.55703 27.80508
2022 28.30990 28.57287 28.56448 27.81713
2023 28.32076 28.59601 28.58513 27.83403
2024
$se
Jan Feb Mar Apr May Jun Jul
2016 0.5000565 0.5405175 0.5405175 0.5405175 0.5405175 0.5405175 0.5405175
2017 0.5948006 0.6034603 0.6034603 0.6034603 0.6034603 0.6034603 0.6034603
2018 0.6358294 0.6411187 0.6411187 0.6411187 0.6411187 0.6411187 0.6411187
2019 0.7131990 0.7246305 0.7246305 0.7246305 0.7246305 0.7246305 0.7246305
2020 0.7742545 0.7823003 0.7823003 0.7823003 0.7823003 0.7823003 0.7823003
2021 0.8223570 0.8289110 0.8289110 0.8289110 0.8289110 0.8289110 0.8289110
2022 0.8739936 0.8813574 0.8813574 0.8813574 0.8813574 0.8813574 0.8813574
2023 0.9223536 0.9290784 0.9290784 0.9290784 0.9290784 0.9290784 0.9290784
2024 0.9664874
Aug Sep Oct Nov Dec
2016 0.5405175 0.5405175 0.5405175 0.5405175 0.5405175
2017 0.6034603 0.6034603 0.6034603 0.6034603 0.6034603
2018 0.6411187 0.6411187 0.6411187 0.6411187 0.6411187
2019 0.7246305 0.7246305 0.7246305 0.7246305 0.7246305
2020 0.7823003 0.7823003 0.7823003 0.7823003 0.7823003
2021 0.8289110 0.8289110 0.8289110 0.8289110 0.8289110
2022 0.8813574 0.8813574 0.8813574 0.8813574 0.8813574
2023 0.9290784 0.9290784 0.9290784 0.9290784 0.9290784
2024
MAPE(sarima_forecast$pred, validation)*100[1] 1.616764
5 Rainfall Data
5.1 Selecting the relevant columns for Temperature Data
rain <- data %>%
select(tdate, station, daily_rainfall_total)
glimpse(rain)Rows: 329,156
Columns: 3
$ tdate <date> 1980-01-01, 1980-01-02, 1980-01-03, 1980-01-04, …
$ station <chr> "Macritchie Reservoir", "Macritchie Reservoir", "…
$ daily_rainfall_total <dbl> 0.0, 0.0, 0.0, 0.0, 22.6, 49.6, 2.4, 0.0, 0.0, 0.…
5.2 Checking for missing values
summary(rain) tdate station daily_rainfall_total
Min. :1980-01-01 Length:329156 Min. : 0.000
1st Qu.:1997-04-29 Class :character 1st Qu.: 0.000
Median :2011-09-18 Mode :character Median : 0.200
Mean :2007-07-02 Mean : 6.822
3rd Qu.:2017-11-27 3rd Qu.: 6.500
Max. :2023-12-31 Max. :278.600
NA's :58 NA's :5136
rain <- rain %>%
drop_na(c(tdate, station))
summary(rain) tdate station daily_rainfall_total
Min. :1980-01-01 Length:329098 Min. : 0.000
1st Qu.:1997-04-29 Class :character 1st Qu.: 0.000
Median :2011-09-18 Mode :character Median : 0.200
Mean :2007-07-02 Mean : 6.822
3rd Qu.:2017-11-27 3rd Qu.: 6.500
Max. :2023-12-31 Max. :278.600
NA's :5078
unique(rain$station) [1] "Macritchie Reservoir" "Lower Peirce Reservoir"
[3] "Admiralty" "East Coast Parkway"
[5] "Ang Mo Kio" "Newton"
[7] "Lim Chu Kang" "Marine Parade"
[9] "Choa Chu Kang (Central)" "Tuas South"
[11] "Pasir Panjang" "Jurong Island"
[13] "Nicoll Highway" "Botanic Garden"
[15] "Choa Chu Kang (South)" "Whampoa"
[17] "Changi" "Jurong Pier"
[19] "Ulu Pandan" "Mandai"
[21] "Tai Seng" "Jurong (West)"
[23] "Clementi" "Sentosa Island"
[25] "Bukit Panjang" "Kranji Reservoir"
[27] "Upper Peirce Reservoir" "Kent Ridge"
[29] "Queenstown" "Tanjong Katong"
[31] "Somerset (Road)" "Punggol"
[33] "Simei" "Toa Payoh"
[35] "Tuas" "Bukit Timah"
[37] "Pasir Ris (Central)"
5.3 Further exploration of total rainfall using plotly
rain_wide <- rain %>%
pivot_wider(names_from = station, values_from = daily_rainfall_total)
glimpse(rain_wide)Rows: 16,071
Columns: 38
$ tdate <date> 1980-01-01, 1980-01-02, 1980-01-03, 1980-01…
$ `Macritchie Reservoir` <dbl> 0.0, 0.0, 0.0, 0.0, 22.6, 49.6, 2.4, 0.0, 0.…
$ `Lower Peirce Reservoir` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ Admiralty <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ `East Coast Parkway` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ `Ang Mo Kio` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ Newton <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ `Lim Chu Kang` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ `Marine Parade` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ `Choa Chu Kang (Central)` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ `Tuas South` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ `Pasir Panjang` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ `Jurong Island` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ `Nicoll Highway` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ `Botanic Garden` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ `Choa Chu Kang (South)` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ Whampoa <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ Changi <dbl> 0.0, 0.0, 0.0, 0.0, 8.0, 9.1, 7.9, 0.0, 0.0,…
$ `Jurong Pier` <dbl> 0.0, 29.2, 0.1, 0.0, 11.1, 27.6, 0.8, 0.0, 0…
$ `Ulu Pandan` <dbl> 0.0, 0.9, 0.0, 0.0, 18.8, 17.0, 5.4, 0.0, 0.…
$ Mandai <dbl> 0.0, 0.0, 0.0, 0.2, 15.8, 40.4, 2.5, 0.0, 0.…
$ `Tai Seng` <dbl> 0.0, 0.0, 0.0, 0.0, 26.1, 49.7, 4.9, 0.0, 0.…
$ `Jurong (West)` <dbl> 0.0, 0.4, 0.0, 0.0, 2.9, 20.9, 0.0, 0.1, 0.0…
$ Clementi <dbl> 0.0, 0.0, 0.0, 0.0, 34.1, NA, NA, NA, NA, NA…
$ `Sentosa Island` <dbl> 0.0, 0.0, 0.0, 0.0, 13.6, 45.5, 0.4, 0.0, 0.…
$ `Bukit Panjang` <dbl> 0.0, 0.0, 0.0, 0.0, 12.8, 28.5, 5.5, 0.0, 0.…
$ `Kranji Reservoir` <dbl> 0.0, 0.0, 0.0, 0.0, 6.4, 18.3, 0.0, 0.0, 0.0…
$ `Upper Peirce Reservoir` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ `Kent Ridge` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ Queenstown <dbl> 0.0, 0.0, 0.0, 0.0, 28.3, 46.0, 1.5, 0.0, 0.…
$ `Tanjong Katong` <dbl> 0.0, 0.0, 0.0, 0.0, 8.7, 30.2, 2.4, 0.0, 0.0…
$ `Somerset (Road)` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ Punggol <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ Simei <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ `Toa Payoh` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ Tuas <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ `Bukit Timah` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ `Pasir Ris (Central)` <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
plot_ly(data = rain_wide,
x = ~tdate,
y = ~ Admiralty,
type = "bar") |>
layout(title = "Total Rain Fall observed by Weather Station",
xaxis = list(title = "Date"),
yaxis = list(title = "", range = c(0,300)),
theme_ipsum_rc(plot_title_size = 13, plot_title_margin=4, subtitle_size=11, subtitle_margin=4,
axis_title_size = 8, axis_text_size=8, axis_title_face= "bold", plot_margin = margin(4, 4, 4, 4)),
updatemenus = list(list(type = 'dropdown',
xref = "paper",
yref = "paper",
xanchor = "left",
x = 0.04,
y = 0.95,
buttons = list(
list(method = "update",
args = list(list(y = list(rain_wide$Admiralty)),
list(yaxis = list(title = "Total Rainfall observed in Admiralty", range = c(0,300)))),label = "Admiralty"),
list(method = "update",
args = list(list(y = list(rain_wide$`East Coast Parkway`)),
list(yaxis = list(title = "Total Rainfall observed in East Coast Parkway", range = c(0,300)))),label = "East Coast Parkway"),
list(method = "update",
args = list(list(y = list(rain_wide$`Ang Mo Kio`)),
list(yaxis = list(title = "Total Rainfall observed in Ang Mo Kio", range = c(0,300)))),label = "Ang Mo Kio"),
list(method = "update",
args = list(list(y = list(rain_wide$Newton)),
list(yaxis = list(title = "Total Rainfall observed in Newton", range = c(0,300)))),label = "Newton"),
list(method = "update",
args = list(list(y = list(rain_wide$`Tuas South`)),
list(yaxis = list(title = "Total Rainfall observed in Tuas South", range = c(0,300)))),label = "Tuas South"),
list(method = "update",
args = list(list(y = list(rain_wide$`Pasir Panjang`)),
list(yaxis = list(title = "Total Rainfall observed in Pasir Panjang", range = c(0,300)))),label = "Pasir Panjang"),
list(method = "update",
args = list(list(y = list(rain_wide$`Jurong Island`)),
list(yaxis = list(title = "Total Rainfall observed in Jurong Island", range = c(0,300)))),label = "Jurong Island"),
list(method = "update",
args = list(list(y = list(rain_wide$`Choa Chu Kang (South)`)),
list(yaxis = list(title = "Total Rainfall observed in Choa Chu Kang (South)", range = c(0,300)))),label = "Choa Chu Kang (South)"),
list(method = "update",
args = list(list(y = list(rain_wide$Changi)),
list(yaxis = list(title = "Total Rainfall observed in Changi", range = c(0,300)))),label = "Changi"),
list(method = "update",
args = list(list(y = list(rain_wide$`Tai Seng`)),
list(yaxis = list(title = "Total Rainfall observed in Tai Seng", range = c(0,300)))),label = "Tai Seng"),
list(method = "update",
args = list(list(y = list(rain_wide$`Jurong (West)`)),
list(yaxis = list(title = "Total Rainfall observed in Jurong West", range = c(0,300)))),label = "Jurong West"),
list(method = "update",
args = list(list(y = list(rain_wide$Clementi)),
list(yaxis = list(title = "Total Rainfall observed in Clementi", range = c(0,300)))),label = "Clementi"),
list(method = "update",
args = list(list(y = list(rain_wide$`Sentosa Island`)),
list(yaxis = list(title = "Total Rainfall observed in Sentosa", range = c(0,300)))),label = "Sentosa"),
list(method = "update",
args = list(list(y = list(rain_wide$`Macritchie Reservoir`)),
list(yaxis = list(title = "Total Rainfall observed at Macritchie Reservoir", range = c(0,300)))),label = "Macritchie Reservoir"),
list(method = "update",
args = list(list(y = list(rain_wide$`Lower Peirce Reservoir`)),
list(yaxis = list(title = "Total Rainfall observed at Lower Peirce Reservoir", range = c(0,300)))),label = "Lower Peirce Reservoir"),
list(method = "update",
args = list(list(y = list(rain_wide$`Lim Chu Kang`)),
list(yaxis = list(title = "Total Rainfall observed at Lim Chu Kang", range = c(0,300)))),label = "Lim Chu Kang"),
list(method = "update",
args = list(list(y = list(rain_wide$`Marine Parade`)),
list(yaxis = list(title = "Total Rainfall observed at Marine Parade", range = c(0,300)))),label = "Marine Parade"),
list(method = "update",
args = list(list(y = list(rain_wide$`Choa Chu Kang (Central)`)),
list(yaxis = list(title = "Total Rainfall observed at Choa Chu Kang (Central)", range = c(0,300)))),label = "Choa Chu Kang (Central)"),
list(method = "update",
args = list(list(y = list(rain_wide$`Nicoll Highway`)),
list(yaxis = list(title = "Total Rainfall observed at Nicoll Highway", range = c(0,300)))),label = "Nicoll Highway"),
list(method = "update",
args = list(list(y = list(rain_wide$`Botanic Garden`)),
list(yaxis = list(title = "Total Rainfall observed at Botanic Garden", range = c(0,300)))),label = "Botanic Garden"),
list(method = "update",
args = list(list(y = list(rain_wide$Whampoa)),
list(yaxis = list(title = "Total Rainfall observed at Whampoa", range = c(0,300)))),label = "Whampoa"),
list(method = "update",
args = list(list(y = list(rain_wide$`Jurong Pier`)),
list(yaxis = list(title = "Total Rainfall observed at Jurong Pier", range = c(0,300)))),label = "Jurong Pier"),
list(method = "update",
args = list(list(y = list(rain_wide$`Ulu Pandan`)),
list(yaxis = list(title = "Total Rainfall observed at Ulu Pandan", range = c(0,300)))),label = "Ulu Pandan"),
list(method = "update",
args = list(list(y = list(rain_wide$Mandai)),
list(yaxis = list(title = "Total Rainfall observed at Mandai", range = c(0,300)))),label = "Mandai"),
list(method = "update",
args = list(list(y = list(rain_wide$`Bukit Panjang`)),
list(yaxis = list(title = "Total Rainfall observed at Bukit Panjang", range = c(0,300)))),label = "Bukit Panjang"),
list(method = "update",
args = list(list(y = list(rain_wide$`Kranji Reservoir`)),
list(yaxis = list(title = "Total Rainfall observed at Kranji Reservoir", range = c(0,300)))),label = "Kranji Reservoir"),
list(method = "update",
args = list(list(y = list(rain_wide$`Upper Peirce Reservoir`)),
list(yaxis = list(title = "Total Rainfall observed at Upper Peirce Reservoir", range = c(0,300)))),label = "Upper Peirce Reservoir"),
list(method = "update",
args = list(list(y = list(rain_wide$`Kent Ridge`)),
list(yaxis = list(title = "Total Rainfall observed at Kent Ridge", range = c(0,300)))),label = "Kent Ridge"),
list(method = "update",
args = list(list(y = list(rain_wide$Queenstown)),
list(yaxis = list(title = "Total Rainfall observed at Queenstown", range = c(0,300)))),label = "Queenstown"),
list(method = "update",
args = list(list(y = list(rain_wide$`Tanjong Katong`)),
list(yaxis = list(title = "Total Rainfall observed at Tanjong Katong", range = c(0,300)))),label = "Tanjong Katong"),
list(method = "update",
args = list(list(y = list(rain_wide$`Somerset (Road)`)),
list(yaxis = list(title = "Total Rainfall observed at Somerset (Road)", range = c(0,300)))),label = "Somerset (Road)"),
list(method = "update",
args = list(list(y = list(rain_wide$`Punggol`)),
list(yaxis = list(title = "Total Rainfall observed at Punggol", range = c(0,300)))),label = "Punggol"),
list(method = "update",
args = list(list(y = list(rain_wide$`Simei`)),
list(yaxis = list(title = "Total Rainfall observed at Simei", range = c(0,300)))),label = "Simei"),
list(method = "update",
args = list(list(y = list(rain_wide$`Toa Payoh`)),
list(yaxis = list(title = "Total Rainfall observed at Toa Payoh", range = c(0,300)))),label = "Toa Payoh"),
list(method = "update",
args = list(list(y = list(rain_wide$`Tuas`)),
list(yaxis = list(title = "Total Rainfall observed at Tuas", range = c(0,300)))),label = "Tuas"),
list(method = "update",
args = list(list(y = list(rain_wide$`Bukit Timah`)),
list(yaxis = list(title = "Total Rainfall observed at Bukit Timah", range = c(0,300)))),label = "Bukit Timah"),
list(method = "update",
args = list(list(y = list(rain_wide$`Pasir Ris (Central)`)),
list(yaxis = list(title = "Total Rainfall observed at Pasir Ris (Central)", range = c(0,300)))),label = "Pasir Ris (Central)")
)))) Seems like there are some stations with no rainfall data in all years, while some have rainfall data from certain years onwards. Let’s explore further.
missing.values <- rain_wide %>%
gather(key = "key", value = "val") %>%
mutate(isna = is.na(val)) %>%
group_by(key) %>%
mutate(total = n()) %>%
group_by(key, total, isna) %>%
summarise(num.isna = n()) %>%
mutate(pct = num.isna / total * 100)
levels <-
(missing.values %>% filter(isna == T) %>% arrange(desc(pct)))$key
percentage.plot <- missing.values %>%
ggplot() +
geom_bar(aes(x = reorder(key, desc(pct)),
y = pct, fill=isna),
stat = 'identity', alpha=0.8) +
scale_x_discrete(limits = levels) +
scale_fill_manual(name = "",
values = c('steelblue', 'tomato3'), labels = c("Present", "Missing")) +
coord_flip() +
labs(title = "Percentage of missing values", x =
'Variable', y = "% of missing values")
percentage.plot
5.4 Preparing for time series forecasting
changirf <- rain %>%
filter(station == "Changi") %>%
select(tdate, daily_rainfall_total)
glimpse(changirf)Rows: 16,071
Columns: 2
$ tdate <date> 1980-01-01, 1980-01-02, 1980-01-03, 1980-01-04, …
$ daily_rainfall_total <dbl> 0.0, 0.0, 0.0, 0.0, 8.0, 9.1, 7.9, 0.0, 0.0, 0.0,…
convert df to timeseries data
changirf_ts <- xts(changirf[,"daily_rainfall_total"], order.by=as.Date(changirf$tdate))
changirf_ts <- ts_regular(changirf_ts)
changirf_ts <- na.fill(changirf_ts, "extend")
changirf_ts <- window(changirf_ts, start = as.Date("1983-01-01"), end = as.Date("2023-12-31"))convert this to a time series object ts_regular gives the time series a regular interval by adding NA values for missing dates na.fill function fills those missing dates by extending values from previous days The window() function clips off the starting and ending dates so the number of years covered is a multiple of four. This will be needed later when the data needs to be aggregated into monthly periods.
changirf_ts_mth <- period.apply(changirf_ts$value, INDEX = seq(1, nrow(changirf_ts) - 1, 30.4375), FUN = sum)
plot(ts_ts(changirf_ts_mth), col="darkgreen",
lwd=3, bty="n", las=1, fg=NA, ylab="Monthly Rainfall (mm)")
grid(nx=NA, ny=NULL, lty=1)
decompsition of rainfall
fit <- stl(ts_ts(changirf_ts_mth), s.window=365, t.window = 14001)
plot(fit)
5.5 Building Models
# create samples
trainingrf <- window(changirf_ts_mth, start = as.Date('1983-01-01'), end = as.Date('2015-12-31'))
validationrf <- window(changirf_ts_mth, start = as.Date('2016-01-01'))5.6 Naive method
naive_model <- naive(trainingrf, h = length(validationrf))
summary(naive_model)
Forecast method: Naive method
Model Information:
Call: naive(y = trainingrf, h = length(validationrf))
Residual sd: 148.1979
Error measures:
ME RMSE MAE MPE MAPE MASE ACF1
Training set 0.255443 148.1979 109.2408 -163.5134 208.4867 1 -0.4617884
Forecasts:
Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
397 101.2 -88.72328 291.1233 -189.2626 391.6626
398 101.2 -167.39207 369.7921 -309.5761 511.9761
399 101.2 -227.75677 430.1568 -401.8960 604.2960
400 101.2 -278.64656 481.0466 -479.7252 682.1252
401 101.2 -323.48136 525.8814 -548.2941 750.6941
402 101.2 -364.01512 566.4151 -610.2851 812.6851
403 101.2 -401.28976 603.6898 -667.2918 869.6918
404 101.2 -435.98415 638.3841 -720.3523 922.7523
405 101.2 -468.56983 670.9698 -770.1878 972.5878
406 101.2 -499.39014 701.7901 -817.3234 1019.7234
407 101.2 -528.70425 731.1043 -862.1554 1064.5554
408 101.2 -556.71353 759.1135 -904.9919 1107.3919
409 101.2 -583.57812 785.9781 -946.0778 1148.4778
410 101.2 -609.42783 811.8278 -985.6115 1188.0115
411 101.2 -634.36969 836.7697 -1023.7568 1226.1568
412 101.2 -658.49311 860.8931 -1060.6504 1263.0504
413 101.2 -681.87373 884.2737 -1096.4079 1298.8079
414 101.2 -704.57622 906.9762 -1131.1284 1333.5284
415 101.2 -726.65637 929.0564 -1164.8971 1367.2971
416 101.2 -748.16272 950.5627 -1197.7882 1400.1882
417 101.2 -769.13780 971.5378 -1229.8668 1432.2668
418 101.2 -789.61913 992.0191 -1261.1903 1463.5903
419 101.2 -809.64004 1012.0400 -1291.8096 1494.2096
420 101.2 -829.23024 1031.6302 -1321.7703 1524.1703
421 101.2 -848.41639 1050.8164 -1351.1129 1553.5129
422 101.2 -867.22250 1069.6225 -1379.8744 1582.2744
423 101.2 -885.67030 1088.0703 -1408.0879 1610.4879
424 101.2 -903.77952 1106.1795 -1435.7835 1638.1835
425 101.2 -921.56815 1123.9682 -1462.9889 1665.3889
426 101.2 -939.05263 1141.4526 -1489.7291 1692.1291
427 101.2 -956.24806 1158.6481 -1516.0272 1718.4272
428 101.2 -973.16830 1175.5683 -1541.9045 1744.3045
429 101.2 -989.82617 1192.2262 -1567.3805 1769.7805
430 101.2 -1006.23350 1208.6335 -1592.4734 1794.8734
431 101.2 -1022.40126 1224.8013 -1617.1998 1819.5998
432 101.2 -1038.33967 1240.7397 -1641.5755 1843.9755
433 101.2 -1054.05820 1256.4582 -1665.6149 1868.0149
434 101.2 -1069.56571 1271.9657 -1689.3316 1891.7316
435 101.2 -1084.87049 1287.2705 -1712.7383 1915.1383
436 101.2 -1099.98028 1302.3803 -1735.8467 1938.2467
437 101.2 -1114.90234 1317.3023 -1758.6680 1961.0680
438 101.2 -1129.64351 1332.0435 -1781.2127 1983.6127
439 101.2 -1144.21022 1346.6102 -1803.4906 2005.8906
440 101.2 -1158.60850 1361.0085 -1825.5108 2027.9108
441 101.2 -1172.84408 1375.2441 -1847.2823 2049.6823
442 101.2 -1186.92234 1389.3223 -1868.8131 2071.2131
443 101.2 -1200.84839 1403.2484 -1890.1112 2092.5112
444 101.2 -1214.62707 1417.0271 -1911.1838 2113.5838
445 101.2 -1228.26294 1430.6629 -1932.0381 2134.4381
446 101.2 -1241.76037 1444.1604 -1952.6807 2155.0807
447 101.2 -1255.12349 1457.5235 -1973.1178 2175.5178
448 101.2 -1268.35623 1470.7562 -1993.3555 2195.7555
449 101.2 -1281.46233 1483.8623 -2013.3996 2215.7996
450 101.2 -1294.44536 1496.8454 -2033.2554 2235.6554
451 101.2 -1307.30872 1509.7087 -2052.9282 2255.3282
452 101.2 -1320.05567 1522.4557 -2072.4230 2274.8230
453 101.2 -1332.68930 1535.0893 -2091.7444 2294.1444
454 101.2 -1345.21259 1547.6126 -2110.8972 2313.2972
455 101.2 -1357.62838 1560.0284 -2129.8855 2332.2855
456 101.2 -1369.93938 1572.3394 -2148.7135 2351.1135
457 101.2 -1382.14822 1584.5482 -2167.3853 2369.7853
458 101.2 -1394.25738 1596.6574 -2185.9047 2388.3047
459 101.2 -1406.26928 1608.6693 -2204.2753 2406.6753
460 101.2 -1418.18622 1620.5862 -2222.5007 2424.9007
461 101.2 -1430.01042 1632.4104 -2240.5842 2442.9842
462 101.2 -1441.74400 1644.1440 -2258.5292 2460.9292
463 101.2 -1453.38903 1655.7890 -2276.3387 2478.7387
464 101.2 -1464.94747 1667.3475 -2294.0159 2496.4159
465 101.2 -1476.42123 1678.8212 -2311.5635 2513.9635
466 101.2 -1487.81214 1690.2121 -2328.9844 2531.3844
467 101.2 -1499.12198 1701.5220 -2346.2813 2548.6813
468 101.2 -1510.35245 1712.7524 -2363.4568 2565.8568
469 101.2 -1521.50520 1723.9052 -2380.5134 2582.9134
470 101.2 -1532.58181 1734.9818 -2397.4537 2599.8537
471 101.2 -1543.58383 1745.9838 -2414.2798 2616.6798
472 101.2 -1554.51275 1756.9127 -2430.9941 2633.3941
473 101.2 -1565.37000 1767.7700 -2447.5989 2649.9989
474 101.2 -1576.15697 1778.5570 -2464.0961 2666.4961
475 101.2 -1586.87502 1789.2750 -2480.4879 2682.8879
476 101.2 -1597.52544 1799.9254 -2496.7764 2699.1764
477 101.2 -1608.10950 1810.5095 -2512.9633 2715.3633
478 101.2 -1618.62843 1821.0284 -2529.0506 2731.4506
479 101.2 -1629.08341 1831.4834 -2545.0401 2747.4401
480 101.2 -1639.47559 1841.8756 -2560.9336 2763.3336
481 101.2 -1649.80610 1852.2061 -2576.7327 2779.1327
482 101.2 -1660.07602 1862.4760 -2592.4392 2794.8392
483 101.2 -1670.28640 1872.6864 -2608.0547 2810.4547
484 101.2 -1680.43827 1882.8383 -2623.5806 2825.9806
485 101.2 -1690.53262 1892.9326 -2639.0186 2841.4186
486 101.2 -1700.57041 1902.9704 -2654.3701 2856.7701
487 101.2 -1710.55260 1912.9526 -2669.6365 2872.0365
488 101.2 -1720.48008 1922.8801 -2684.8193 2887.2193
489 101.2 -1730.35376 1932.7538 -2699.9198 2902.3198
490 101.2 -1740.17449 1942.5745 -2714.9393 2917.3393
491 101.2 -1749.94313 1952.3431 -2729.8791 2932.2791
492 101.2 -1759.66048 1962.0605 -2744.7405 2947.1405
493 101.2 -1769.32735 1971.7274 -2759.5247 2961.9247
5.7 Seasonal Naive method
snaive_model <- snaive(trainingrf, h = length(validationrf))
summary(snaive_model)
Forecast method: Seasonal naive method
Model Information:
Call: snaive(y = trainingrf, h = length(validationrf))
Residual sd: 148.1979
Error measures:
ME RMSE MAE MPE MAPE MASE ACF1
Training set 0.255443 148.1979 109.2408 -163.5134 208.4867 1 -0.4617884
Forecasts:
Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
397 101.2 -88.72328 291.1233 -189.2626 391.6626
398 101.2 -167.39207 369.7921 -309.5761 511.9761
399 101.2 -227.75677 430.1568 -401.8960 604.2960
400 101.2 -278.64656 481.0466 -479.7252 682.1252
401 101.2 -323.48136 525.8814 -548.2941 750.6941
402 101.2 -364.01512 566.4151 -610.2851 812.6851
403 101.2 -401.28976 603.6898 -667.2918 869.6918
404 101.2 -435.98415 638.3841 -720.3523 922.7523
405 101.2 -468.56983 670.9698 -770.1878 972.5878
406 101.2 -499.39014 701.7901 -817.3234 1019.7234
407 101.2 -528.70425 731.1043 -862.1554 1064.5554
408 101.2 -556.71353 759.1135 -904.9919 1107.3919
409 101.2 -583.57812 785.9781 -946.0778 1148.4778
410 101.2 -609.42783 811.8278 -985.6115 1188.0115
411 101.2 -634.36969 836.7697 -1023.7568 1226.1568
412 101.2 -658.49311 860.8931 -1060.6504 1263.0504
413 101.2 -681.87373 884.2737 -1096.4079 1298.8079
414 101.2 -704.57622 906.9762 -1131.1284 1333.5284
415 101.2 -726.65637 929.0564 -1164.8971 1367.2971
416 101.2 -748.16272 950.5627 -1197.7882 1400.1882
417 101.2 -769.13780 971.5378 -1229.8668 1432.2668
418 101.2 -789.61913 992.0191 -1261.1903 1463.5903
419 101.2 -809.64004 1012.0400 -1291.8096 1494.2096
420 101.2 -829.23024 1031.6302 -1321.7703 1524.1703
421 101.2 -848.41639 1050.8164 -1351.1129 1553.5129
422 101.2 -867.22250 1069.6225 -1379.8744 1582.2744
423 101.2 -885.67030 1088.0703 -1408.0879 1610.4879
424 101.2 -903.77952 1106.1795 -1435.7835 1638.1835
425 101.2 -921.56815 1123.9682 -1462.9889 1665.3889
426 101.2 -939.05263 1141.4526 -1489.7291 1692.1291
427 101.2 -956.24806 1158.6481 -1516.0272 1718.4272
428 101.2 -973.16830 1175.5683 -1541.9045 1744.3045
429 101.2 -989.82617 1192.2262 -1567.3805 1769.7805
430 101.2 -1006.23350 1208.6335 -1592.4734 1794.8734
431 101.2 -1022.40126 1224.8013 -1617.1998 1819.5998
432 101.2 -1038.33967 1240.7397 -1641.5755 1843.9755
433 101.2 -1054.05820 1256.4582 -1665.6149 1868.0149
434 101.2 -1069.56571 1271.9657 -1689.3316 1891.7316
435 101.2 -1084.87049 1287.2705 -1712.7383 1915.1383
436 101.2 -1099.98028 1302.3803 -1735.8467 1938.2467
437 101.2 -1114.90234 1317.3023 -1758.6680 1961.0680
438 101.2 -1129.64351 1332.0435 -1781.2127 1983.6127
439 101.2 -1144.21022 1346.6102 -1803.4906 2005.8906
440 101.2 -1158.60850 1361.0085 -1825.5108 2027.9108
441 101.2 -1172.84408 1375.2441 -1847.2823 2049.6823
442 101.2 -1186.92234 1389.3223 -1868.8131 2071.2131
443 101.2 -1200.84839 1403.2484 -1890.1112 2092.5112
444 101.2 -1214.62707 1417.0271 -1911.1838 2113.5838
445 101.2 -1228.26294 1430.6629 -1932.0381 2134.4381
446 101.2 -1241.76037 1444.1604 -1952.6807 2155.0807
447 101.2 -1255.12349 1457.5235 -1973.1178 2175.5178
448 101.2 -1268.35623 1470.7562 -1993.3555 2195.7555
449 101.2 -1281.46233 1483.8623 -2013.3996 2215.7996
450 101.2 -1294.44536 1496.8454 -2033.2554 2235.6554
451 101.2 -1307.30872 1509.7087 -2052.9282 2255.3282
452 101.2 -1320.05567 1522.4557 -2072.4230 2274.8230
453 101.2 -1332.68930 1535.0893 -2091.7444 2294.1444
454 101.2 -1345.21259 1547.6126 -2110.8972 2313.2972
455 101.2 -1357.62838 1560.0284 -2129.8855 2332.2855
456 101.2 -1369.93938 1572.3394 -2148.7135 2351.1135
457 101.2 -1382.14822 1584.5482 -2167.3853 2369.7853
458 101.2 -1394.25738 1596.6574 -2185.9047 2388.3047
459 101.2 -1406.26928 1608.6693 -2204.2753 2406.6753
460 101.2 -1418.18622 1620.5862 -2222.5007 2424.9007
461 101.2 -1430.01042 1632.4104 -2240.5842 2442.9842
462 101.2 -1441.74400 1644.1440 -2258.5292 2460.9292
463 101.2 -1453.38903 1655.7890 -2276.3387 2478.7387
464 101.2 -1464.94747 1667.3475 -2294.0159 2496.4159
465 101.2 -1476.42123 1678.8212 -2311.5635 2513.9635
466 101.2 -1487.81214 1690.2121 -2328.9844 2531.3844
467 101.2 -1499.12198 1701.5220 -2346.2813 2548.6813
468 101.2 -1510.35245 1712.7524 -2363.4568 2565.8568
469 101.2 -1521.50520 1723.9052 -2380.5134 2582.9134
470 101.2 -1532.58181 1734.9818 -2397.4537 2599.8537
471 101.2 -1543.58383 1745.9838 -2414.2798 2616.6798
472 101.2 -1554.51275 1756.9127 -2430.9941 2633.3941
473 101.2 -1565.37000 1767.7700 -2447.5989 2649.9989
474 101.2 -1576.15697 1778.5570 -2464.0961 2666.4961
475 101.2 -1586.87502 1789.2750 -2480.4879 2682.8879
476 101.2 -1597.52544 1799.9254 -2496.7764 2699.1764
477 101.2 -1608.10950 1810.5095 -2512.9633 2715.3633
478 101.2 -1618.62843 1821.0284 -2529.0506 2731.4506
479 101.2 -1629.08341 1831.4834 -2545.0401 2747.4401
480 101.2 -1639.47559 1841.8756 -2560.9336 2763.3336
481 101.2 -1649.80610 1852.2061 -2576.7327 2779.1327
482 101.2 -1660.07602 1862.4760 -2592.4392 2794.8392
483 101.2 -1670.28640 1872.6864 -2608.0547 2810.4547
484 101.2 -1680.43827 1882.8383 -2623.5806 2825.9806
485 101.2 -1690.53262 1892.9326 -2639.0186 2841.4186
486 101.2 -1700.57041 1902.9704 -2654.3701 2856.7701
487 101.2 -1710.55260 1912.9526 -2669.6365 2872.0365
488 101.2 -1720.48008 1922.8801 -2684.8193 2887.2193
489 101.2 -1730.35376 1932.7538 -2699.9198 2902.3198
490 101.2 -1740.17449 1942.5745 -2714.9393 2917.3393
491 101.2 -1749.94313 1952.3431 -2729.8791 2932.2791
492 101.2 -1759.66048 1962.0605 -2744.7405 2947.1405
493 101.2 -1769.32735 1971.7274 -2759.5247 2961.9247
5.8 State Space Models
ets_modelrf <- ets(trainingrf, allow.multiplicative.trend = TRUE)
summary(ets_modelrf)ETS(A,N,N)
Call:
ets(y = trainingrf, allow.multiplicative.trend = TRUE)
Smoothing parameters:
alpha = 1e-04
Initial states:
l = 177.9223
sigma: 114.7456
AIC AICc BIC
6128.867 6128.928 6140.811
Training set error measures:
ME RMSE MAE MPE MAPE MASE
Training set 0.004551363 114.4554 87.41135 -454.4325 480.2861 0.8001716
ACF1
Training set 0.1601591
additive errors No trend No seasonality
ets_forecastrf <- forecast(ets_modelrf, h=length(validationrf))
ets_forecastrf Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
397 177.9225 30.87011 324.9748 -46.97469 402.8196
398 177.9225 30.87011 324.9748 -46.97469 402.8196
399 177.9225 30.87011 324.9748 -46.97469 402.8196
400 177.9225 30.87011 324.9748 -46.97470 402.8196
401 177.9225 30.87011 324.9748 -46.97470 402.8196
402 177.9225 30.87011 324.9748 -46.97470 402.8196
403 177.9225 30.87011 324.9748 -46.97470 402.8196
404 177.9225 30.87011 324.9748 -46.97470 402.8196
405 177.9225 30.87011 324.9748 -46.97470 402.8196
406 177.9225 30.87011 324.9748 -46.97470 402.8196
407 177.9225 30.87011 324.9748 -46.97470 402.8196
408 177.9225 30.87011 324.9748 -46.97470 402.8196
409 177.9225 30.87011 324.9748 -46.97471 402.8196
410 177.9225 30.87010 324.9748 -46.97471 402.8196
411 177.9225 30.87010 324.9748 -46.97471 402.8196
412 177.9225 30.87010 324.9748 -46.97471 402.8196
413 177.9225 30.87010 324.9748 -46.97471 402.8196
414 177.9225 30.87010 324.9748 -46.97471 402.8196
415 177.9225 30.87010 324.9748 -46.97471 402.8196
416 177.9225 30.87010 324.9748 -46.97471 402.8196
417 177.9225 30.87010 324.9748 -46.97471 402.8196
418 177.9225 30.87010 324.9748 -46.97472 402.8196
419 177.9225 30.87010 324.9748 -46.97472 402.8196
420 177.9225 30.87010 324.9748 -46.97472 402.8196
421 177.9225 30.87010 324.9748 -46.97472 402.8196
422 177.9225 30.87010 324.9748 -46.97472 402.8196
423 177.9225 30.87010 324.9748 -46.97472 402.8196
424 177.9225 30.87009 324.9748 -46.97472 402.8196
425 177.9225 30.87009 324.9748 -46.97472 402.8196
426 177.9225 30.87009 324.9748 -46.97472 402.8196
427 177.9225 30.87009 324.9748 -46.97473 402.8196
428 177.9225 30.87009 324.9748 -46.97473 402.8196
429 177.9225 30.87009 324.9748 -46.97473 402.8196
430 177.9225 30.87009 324.9748 -46.97473 402.8196
431 177.9225 30.87009 324.9748 -46.97473 402.8196
432 177.9225 30.87009 324.9748 -46.97473 402.8196
433 177.9225 30.87009 324.9748 -46.97473 402.8196
434 177.9225 30.87009 324.9748 -46.97473 402.8196
435 177.9225 30.87009 324.9748 -46.97473 402.8196
436 177.9225 30.87009 324.9748 -46.97474 402.8196
437 177.9225 30.87009 324.9748 -46.97474 402.8196
438 177.9225 30.87008 324.9748 -46.97474 402.8196
439 177.9225 30.87008 324.9748 -46.97474 402.8196
440 177.9225 30.87008 324.9748 -46.97474 402.8196
441 177.9225 30.87008 324.9748 -46.97474 402.8196
442 177.9225 30.87008 324.9748 -46.97474 402.8197
443 177.9225 30.87008 324.9748 -46.97474 402.8197
444 177.9225 30.87008 324.9748 -46.97474 402.8197
445 177.9225 30.87008 324.9748 -46.97475 402.8197
446 177.9225 30.87008 324.9748 -46.97475 402.8197
447 177.9225 30.87008 324.9748 -46.97475 402.8197
448 177.9225 30.87008 324.9748 -46.97475 402.8197
449 177.9225 30.87008 324.9748 -46.97475 402.8197
450 177.9225 30.87008 324.9748 -46.97475 402.8197
451 177.9225 30.87007 324.9748 -46.97475 402.8197
452 177.9225 30.87007 324.9748 -46.97475 402.8197
453 177.9225 30.87007 324.9748 -46.97475 402.8197
454 177.9225 30.87007 324.9748 -46.97476 402.8197
455 177.9225 30.87007 324.9748 -46.97476 402.8197
456 177.9225 30.87007 324.9748 -46.97476 402.8197
457 177.9225 30.87007 324.9748 -46.97476 402.8197
458 177.9225 30.87007 324.9748 -46.97476 402.8197
459 177.9225 30.87007 324.9748 -46.97476 402.8197
460 177.9225 30.87007 324.9748 -46.97476 402.8197
461 177.9225 30.87007 324.9748 -46.97476 402.8197
462 177.9225 30.87007 324.9748 -46.97477 402.8197
463 177.9225 30.87007 324.9748 -46.97477 402.8197
464 177.9225 30.87007 324.9748 -46.97477 402.8197
465 177.9225 30.87006 324.9748 -46.97477 402.8197
466 177.9225 30.87006 324.9748 -46.97477 402.8197
467 177.9225 30.87006 324.9748 -46.97477 402.8197
468 177.9225 30.87006 324.9748 -46.97477 402.8197
469 177.9225 30.87006 324.9748 -46.97477 402.8197
470 177.9225 30.87006 324.9748 -46.97477 402.8197
471 177.9225 30.87006 324.9748 -46.97478 402.8197
472 177.9225 30.87006 324.9748 -46.97478 402.8197
473 177.9225 30.87006 324.9748 -46.97478 402.8197
474 177.9225 30.87006 324.9749 -46.97478 402.8197
475 177.9225 30.87006 324.9749 -46.97478 402.8197
476 177.9225 30.87006 324.9749 -46.97478 402.8197
477 177.9225 30.87006 324.9749 -46.97478 402.8197
478 177.9225 30.87005 324.9749 -46.97478 402.8197
479 177.9225 30.87005 324.9749 -46.97478 402.8197
480 177.9225 30.87005 324.9749 -46.97479 402.8197
481 177.9225 30.87005 324.9749 -46.97479 402.8197
482 177.9225 30.87005 324.9749 -46.97479 402.8197
483 177.9225 30.87005 324.9749 -46.97479 402.8197
484 177.9225 30.87005 324.9749 -46.97479 402.8197
485 177.9225 30.87005 324.9749 -46.97479 402.8197
486 177.9225 30.87005 324.9749 -46.97479 402.8197
487 177.9225 30.87005 324.9749 -46.97479 402.8197
488 177.9225 30.87005 324.9749 -46.97479 402.8197
489 177.9225 30.87005 324.9749 -46.97480 402.8197
490 177.9225 30.87005 324.9749 -46.97480 402.8197
491 177.9225 30.87005 324.9749 -46.97480 402.8197
492 177.9225 30.87004 324.9749 -46.97480 402.8197
493 177.9225 30.87004 324.9749 -46.97480 402.8197
5.9 Holt-Winters
5.10 ARIMA
arima_optimal <- auto.arima(trainingrf)
summary(arima_optimal)Series: trainingrf
ARIMA(4,0,2) with non-zero mean
Coefficients:
ar1 ar2 ar3 ar4 ma1 ma2 mean
0.1935 0.9521 -0.2109 -0.0492 -0.0349 -0.8948 179.6007
s.e. 0.0772 0.0710 0.0513 0.0547 0.0587 0.0536 3.6884
sigma^2 = 12584: log likelihood = -2427.68
AIC=4871.36 AICc=4871.73 BIC=4903.21
Training set error measures:
ME RMSE MAE MPE MAPE MASE
Training set -0.6514072 111.181 84.55129 -410.529 435.8642 0.7739903
ACF1
Training set -0.0008009035
6 Shiny Dashboard Prototype - UI and Server Design
6.1 UI
6.1.1 Sketch
6.1.2 Details
6.2 Server
6.2.1 Details
7 References
https://michaelminn.net/tutorials/r-weather/index.html https://towardsdatascience.com/a-guide-to-forecasting-in-r-6b0c9638c261 https://www.singstat.gov.sg/-/media/files/publications/reference/ssnsep05-pg11-14.ashx https://otexts.com/fpp2/estimation-and-model-selection.html